toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   "It all comes from here, the stench and the peril."  --Frodo
  13  */
  14
  15 /*
  16  * This file is the lexer for Perl.  It's closely linked to the
  17  * parser, perly.y.
  18  *
  19  * The main routine is yylex(), which returns the next token.
  20  */
  21
  22 #include "EXTERN.h"
  23 #define PERL_IN_TOKE_C
  24 #include "perl.h"
  25
  26 #define new_constant(a,b,c,d,e,f,g)     \
  27         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  28
  29 #define yylval  (PL_parser->yylval)
  30
  31 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  32 #define YYINITDEPTH 200
  33
  34 /* XXX temporary backwards compatibility */
  35 #define PL_lex_brackets         (PL_parser->lex_brackets)
  36 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  37 #define PL_lex_casemods         (PL_parser->lex_casemods)
  38 #define PL_lex_casestack        (PL_parser->lex_casestack)
  39 #define PL_lex_defer            (PL_parser->lex_defer)
  40 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  41 #define PL_lex_expect           (PL_parser->lex_expect)
  42 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  43 #define PL_lex_inpat            (PL_parser->lex_inpat)
  44 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  45 #define PL_lex_op               (PL_parser->lex_op)
  46 #define PL_lex_repl             (PL_parser->lex_repl)
  47 #define PL_lex_starts           (PL_parser->lex_starts)
  48 #define PL_lex_stuff            (PL_parser->lex_stuff)
  49 #define PL_multi_start          (PL_parser->multi_start)
  50 #define PL_multi_open           (PL_parser->multi_open)
  51 #define PL_multi_close          (PL_parser->multi_close)
  52 #define PL_pending_ident        (PL_parser->pending_ident)
  53 #define PL_preambled            (PL_parser->preambled)
  54 #define PL_sublex_info          (PL_parser->sublex_info)
  55 #define PL_linestr              (PL_parser->linestr)
  56 #define PL_expect               (PL_parser->expect)
  57 #define PL_copline              (PL_parser->copline)
  58 #define PL_bufptr               (PL_parser->bufptr)
  59 #define PL_oldbufptr            (PL_parser->oldbufptr)
  60 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  61 #define PL_linestart            (PL_parser->linestart)
  62 #define PL_bufend               (PL_parser->bufend)
  63 #define PL_last_uni             (PL_parser->last_uni)
  64 #define PL_last_lop             (PL_parser->last_lop)
  65 #define PL_last_lop_op          (PL_parser->last_lop_op)
  66 #define PL_lex_state            (PL_parser->lex_state)
  67 #define PL_rsfp                 (PL_parser->rsfp)
  68 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  69 #define PL_in_my                (PL_parser->in_my)
  70 #define PL_in_my_stash          (PL_parser->in_my_stash)
  71 #define PL_tokenbuf             (PL_parser->tokenbuf)
  72 #define PL_multi_end            (PL_parser->multi_end)
  73 #define PL_error_count          (PL_parser->error_count)
  74
  75 #ifdef PERL_MAD
  76 #  define PL_endwhite           (PL_parser->endwhite)
  77 #  define PL_faketokens         (PL_parser->faketokens)
  78 #  define PL_lasttoke           (PL_parser->lasttoke)
  79 #  define PL_nextwhite          (PL_parser->nextwhite)
  80 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  81 #  define PL_skipwhite          (PL_parser->skipwhite)
  82 #  define PL_thisclose          (PL_parser->thisclose)
  83 #  define PL_thismad            (PL_parser->thismad)
  84 #  define PL_thisopen           (PL_parser->thisopen)
  85 #  define PL_thisstuff          (PL_parser->thisstuff)
  86 #  define PL_thistoken          (PL_parser->thistoken)
  87 #  define PL_thiswhite          (PL_parser->thiswhite)
  88 #  define PL_thiswhite          (PL_parser->thiswhite)
  89 #  define PL_nexttoke           (PL_parser->nexttoke)
  90 #  define PL_curforce           (PL_parser->curforce)
  91 #else
  92 #  define PL_nexttoke           (PL_parser->nexttoke)
  93 #  define PL_nexttype           (PL_parser->nexttype)
  94 #  define PL_nextval            (PL_parser->nextval)
  95 #endif
  96
  97 static int
  98 S_pending_ident(pTHX);
  99
 100 static const char ident_too_long[] = "Identifier too long";
 101 static const char commaless_variable_list[] = "comma-less variable list";
 102
 103 #ifndef PERL_NO_UTF16_FILTER
 104 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 105 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 106 #endif
 107
 108 #ifdef PERL_MAD
 109 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 110 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 111 #else
 112 #  define CURMAD(slot,sv)
 113 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 114 #endif
 115
 116 #define XFAKEBRACK 128
 117 #define XENUMMASK 127
 118
 119 #ifdef USE_UTF8_SCRIPTS
 120 #   define UTF (!IN_BYTES)
 121 #else
 122 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 123 #endif
 124
 125 /* In variables named $^X, these are the legal values for X.
 126  * 1999-02-27 mjd-perl-patch@plover.com */
 127 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 128
 129 /* On MacOS, respect nonbreaking spaces */
 130 #ifdef MACOS_TRADITIONAL
 131 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
 132 #else
 133 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 134 #endif
 135
 136 /* LEX_* are values for PL_lex_state, the state of the lexer.
 137  * They are arranged oddly so that the guard on the switch statement
 138  * can get by with a single comparison (if the compiler is smart enough).
 139  */
 140
 141 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 142
 143 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 144 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 145 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 146 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 147 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 148
 149                                    /* at end of code, eg "$x" followed by:  */
 150 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 151 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 152
 153 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 154                                         string or after \E, $foo, etc       */
 155 #define LEX_INTERPCONST          2 /* NOT USED */
 156 #define LEX_FORMLINE             1 /* expecting a format line               */
 157 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 158
 159
 160 #ifdef DEBUGGING
 161 static const char* const lex_state_names[] = {
 162     "KNOWNEXT",
 163     "FORMLINE",
 164     "INTERPCONST",
 165     "INTERPCONCAT",
 166     "INTERPENDMAYBE",
 167     "INTERPEND",
 168     "INTERPSTART",
 169     "INTERPPUSH",
 170     "INTERPCASEMOD",
 171     "INTERPNORMAL",
 172     "NORMAL"
 173 };
 174 #endif
 175
 176 #ifdef ff_next
 177 #undef ff_next
 178 #endif
 179
 180 #include "keywords.h"
 181
 182 /* CLINE is a macro that ensures PL_copline has a sane value */
 183
 184 #ifdef CLINE
 185 #undef CLINE
 186 #endif
 187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 188
 189 #ifdef PERL_MAD
 190 #  define SKIPSPACE0(s) skipspace0(s)
 191 #  define SKIPSPACE1(s) skipspace1(s)
 192 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 193 #  define PEEKSPACE(s) skipspace2(s,0)
 194 #else
 195 #  define SKIPSPACE0(s) skipspace(s)
 196 #  define SKIPSPACE1(s) skipspace(s)
 197 #  define SKIPSPACE2(s,tsv) skipspace(s)
 198 #  define PEEKSPACE(s) skipspace(s)
 199 #endif
 200
 201 /*
 202  * Convenience functions to return different tokens and prime the
 203  * lexer for the next token.  They all take an argument.
 204  *
 205  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 206  * OPERATOR     : generic operator
 207  * AOPERATOR    : assignment operator
 208  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 209  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 210  * PREREF       : *EXPR where EXPR is not a simple identifier
 211  * TERM         : expression term
 212  * LOOPX        : loop exiting command (goto, last, dump, etc)
 213  * FTST         : file test operator
 214  * FUN0         : zero-argument function
 215  * FUN1         : not used, except for not, which isn't a UNIOP
 216  * BOop         : bitwise or or xor
 217  * BAop         : bitwise and
 218  * SHop         : shift operator
 219  * PWop         : power operator
 220  * PMop         : pattern-matching operator
 221  * Aop          : addition-level operator
 222  * Mop          : multiplication-level operator
 223  * Eop          : equality-testing operator
 224  * Rop          : relational operator <= != gt
 225  *
 226  * Also see LOP and lop() below.
 227  */
 228
 229 #ifdef DEBUGGING /* Serve -DT. */
 230 #   define REPORT(retval) tokereport((I32)retval)
 231 #else
 232 #   define REPORT(retval) (retval)
 233 #endif
 234
 235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 242 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 243 #define FTST(f)  return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 244 #define FUN0(f)  return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 245 #define FUN1(f)  return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 246 #define BOop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 247 #define BAop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 248 #define SHop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 249 #define PWop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 250 #define PMop(f)  return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 251 #define Aop(f)   return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 252 #define Mop(f)   return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 253 #define Eop(f)   return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 254 #define Rop(f)   return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 255
 256 /* This bit of chicanery makes a unary function followed by
 257  * a parenthesis into a function with one argument, highest precedence.
 258  * The UNIDOR macro is for unary functions that can be followed by the //
 259  * operator (such as C<shift // 0>).
 260  */
 261 #define UNI2(f,x) { \
 262         yylval.ival = f; \
 263         PL_expect = x; \
 264         PL_bufptr = s; \
 265         PL_last_uni = PL_oldbufptr; \
 266         PL_last_lop_op = f; \
 267         if (*s == '(') \
 268             return REPORT( (int)FUNC1 ); \
 269         s = PEEKSPACE(s); \
 270         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 271         }
 272 #define UNI(f)    UNI2(f,XTERM)
 273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 274
 275 #define UNIBRACK(f) { \
 276         yylval.ival = f; \
 277         PL_bufptr = s; \
 278         PL_last_uni = PL_oldbufptr; \
 279         if (*s == '(') \
 280             return REPORT( (int)FUNC1 ); \
 281         s = PEEKSPACE(s); \
 282         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 283         }
 284
 285 /* grandfather return to old style */
 286 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 287
 288 #ifdef DEBUGGING
 289
 290 /* how to interpret the yylval associated with the token */
 291 enum token_type {
 292     TOKENTYPE_NONE,
 293     TOKENTYPE_IVAL,
 294     TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
 295     TOKENTYPE_PVAL,
 296     TOKENTYPE_OPVAL,
 297     TOKENTYPE_GVVAL
 298 };
 299
 300 static struct debug_tokens {
 301     const int token;
 302     enum token_type type;
 303     const char *name;
 304 } const debug_tokens[] =
 305 {
 306     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 307     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 308     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 309     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 310     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 311     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 312     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 313     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 314     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 315     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 316     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 317     { DO,               TOKENTYPE_NONE,         "DO" },
 318     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 319     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 320     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 321     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 322     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 323     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 324     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 325     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 326     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 327     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 328     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 329     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 330     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 331     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 332     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 333     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 334     { IF,               TOKENTYPE_IVAL,         "IF" },
 335     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 336     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 337     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 338     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 339     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 340     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 341     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 342     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 343     { MY,               TOKENTYPE_IVAL,         "MY" },
 344     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 345     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 346     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 347     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 348     { OROR,             TOKENTYPE_NONE,         "OROR" },
 349     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 350     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 351     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 352     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 353     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 354     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 355     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 356     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 357     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 358     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 359     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 360     { SUB,              TOKENTYPE_NONE,         "SUB" },
 361     { THING,            TOKENTYPE_OPVAL,        "THING" },
 362     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 363     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 364     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 365     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 366     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 367     { USE,              TOKENTYPE_IVAL,         "USE" },
 368     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 369     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 370     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 371     { 0,                TOKENTYPE_NONE,         NULL }
 372 };
 373
 374 /* dump the returned token in rv, plus any optional arg in yylval */
 375
 376 STATIC int
 377 S_tokereport(pTHX_ I32 rv)
 378 {
 379     dVAR;
 380     if (DEBUG_T_TEST) {
 381         const char *name = NULL;
 382         enum token_type type = TOKENTYPE_NONE;
 383         const struct debug_tokens *p;
 384         SV* const report = newSVpvs("<== ");
 385
 386         for (p = debug_tokens; p->token; p++) {
 387             if (p->token == (int)rv) {
 388                 name = p->name;
 389                 type = p->type;
 390                 break;
 391             }
 392         }
 393         if (name)
 394             Perl_sv_catpv(aTHX_ report, name);
 395         else if ((char)rv > ' ' && (char)rv < '~')
 396             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 397         else if (!rv)
 398             sv_catpvs(report, "EOF");
 399         else
 400             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 401         switch (type) {
 402         case TOKENTYPE_NONE:
 403         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 404             break;
 405         case TOKENTYPE_IVAL:
 406             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
 407             break;
 408         case TOKENTYPE_OPNUM:
 409             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 410                                     PL_op_name[yylval.ival]);
 411             break;
 412         case TOKENTYPE_PVAL:
 413             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
 414             break;
 415         case TOKENTYPE_OPVAL:
 416             if (yylval.opval) {
 417                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 418                                     PL_op_name[yylval.opval->op_type]);
 419                 if (yylval.opval->op_type == OP_CONST) {
 420                     Perl_sv_catpvf(aTHX_ report, " %s",
 421                         SvPEEK(cSVOPx_sv(yylval.opval)));
 422                 }
 423
 424             }
 425             else
 426                 sv_catpvs(report, "(opval=null)");
 427             break;
 428         }
 429         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 430     };
 431     return (int)rv;
 432 }
 433
 434
 435 /* print the buffer with suitable escapes */
 436
 437 STATIC void
 438 S_printbuf(pTHX_ const char* fmt, const char* s)
 439 {
 440     SV* const tmp = newSVpvs("");
 441     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 442     SvREFCNT_dec(tmp);
 443 }
 444
 445 #endif
 446
 447 /*
 448  * S_ao
 449  *
 450  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 451  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 452  */
 453
 454 STATIC int
 455 S_ao(pTHX_ int toketype)
 456 {
 457     dVAR;
 458     if (*PL_bufptr == '=') {
 459         PL_bufptr++;
 460         if (toketype == ANDAND)
 461             yylval.ival = OP_ANDASSIGN;
 462         else if (toketype == OROR)
 463             yylval.ival = OP_ORASSIGN;
 464         else if (toketype == DORDOR)
 465             yylval.ival = OP_DORASSIGN;
 466         toketype = ASSIGNOP;
 467     }
 468     return toketype;
 469 }
 470
 471 /*
 472  * S_no_op
 473  * When Perl expects an operator and finds something else, no_op
 474  * prints the warning.  It always prints "<something> found where
 475  * operator expected.  It prints "Missing semicolon on previous line?"
 476  * if the surprise occurs at the start of the line.  "do you need to
 477  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 478  * where the compiler doesn't know if foo is a method call or a function.
 479  * It prints "Missing operator before end of line" if there's nothing
 480  * after the missing operator, or "... before <...>" if there is something
 481  * after the missing operator.
 482  */
 483
 484 STATIC void
 485 S_no_op(pTHX_ const char *what, char *s)
 486 {
 487     dVAR;
 488     char * const oldbp = PL_bufptr;
 489     const bool is_first = (PL_oldbufptr == PL_linestart);
 490
 491     if (!s)
 492         s = oldbp;
 493     else
 494         PL_bufptr = s;
 495     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 496     if (ckWARN_d(WARN_SYNTAX)) {
 497         if (is_first)
 498             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 499                     "\t(Missing semicolon on previous line?)\n");
 500         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 501             const char *t;
 502             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 503                 NOOP;
 504             if (t < PL_bufptr && isSPACE(*t))
 505                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 506                         "\t(Do you need to predeclare %.*s?)\n",
 507                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 508         }
 509         else {
 510             assert(s >= oldbp);
 511             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 512                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 513         }
 514     }
 515     PL_bufptr = oldbp;
 516 }
 517
 518 /*
 519  * S_missingterm
 520  * Complain about missing quote/regexp/heredoc terminator.
 521  * If it's called with NULL then it cauterizes the line buffer.
 522  * If we're in a delimited string and the delimiter is a control
 523  * character, it's reformatted into a two-char sequence like ^C.
 524  * This is fatal.
 525  */
 526
 527 STATIC void
 528 S_missingterm(pTHX_ char *s)
 529 {
 530     dVAR;
 531     char tmpbuf[3];
 532     char q;
 533     if (s) {
 534         char * const nl = strrchr(s,'\n');
 535         if (nl)
 536             *nl = '\0';
 537     }
 538     else if (
 539 #ifdef EBCDIC
 540         iscntrl(PL_multi_close)
 541 #else
 542         PL_multi_close < 32 || PL_multi_close == 127
 543 #endif
 544         ) {
 545         *tmpbuf = '^';
 546         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 547         tmpbuf[2] = '\0';
 548         s = tmpbuf;
 549     }
 550     else {
 551         *tmpbuf = (char)PL_multi_close;
 552         tmpbuf[1] = '\0';
 553         s = tmpbuf;
 554     }
 555     q = strchr(s,'"') ? '\'' : '"';
 556     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 557 }
 558
 559 #define FEATURE_IS_ENABLED(name)                                        \
 560         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 561             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 562 /*
 563  * S_feature_is_enabled
 564  * Check whether the named feature is enabled.
 565  */
 566 STATIC bool
 567 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
 568 {
 569     dVAR;
 570     HV * const hinthv = GvHV(PL_hintgv);
 571     char he_name[32] = "feature_";
 572     (void) my_strlcpy(&he_name[8], name, 24);
 573
 574     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 575 }
 576
 577 /*
 578  * Perl_deprecate
 579  */
 580
 581 void
 582 Perl_deprecate(pTHX_ const char *s)
 583 {
 584     if (ckWARN(WARN_DEPRECATED))
 585         Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
 586 }
 587
 588 void
 589 Perl_deprecate_old(pTHX_ const char *s)
 590 {
 591     /* This function should NOT be called for any new deprecated warnings */
 592     /* Use Perl_deprecate instead                                         */
 593     /*                                                                    */
 594     /* It is here to maintain backward compatibility with the pre-5.8     */
 595     /* warnings category hierarchy. The "deprecated" category used to     */
 596     /* live under the "syntax" category. It is now a top-level category   */
 597     /* in its own right.                                                  */
 598
 599     if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
 600         Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
 601                         "Use of %s is deprecated", s);
 602 }
 603
 604 /*
 605  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 606  * utf16-to-utf8-reversed.
 607  */
 608
 609 #ifdef PERL_CR_FILTER
 610 static void
 611 strip_return(SV *sv)
 612 {
 613     register const char *s = SvPVX_const(sv);
 614     register const char * const e = s + SvCUR(sv);
 615     /* outer loop optimized to do nothing if there are no CR-LFs */
 616     while (s < e) {
 617         if (*s++ == '\r' && *s == '\n') {
 618             /* hit a CR-LF, need to copy the rest */
 619             register char *d = s - 1;
 620             *d++ = *s++;
 621             while (s < e) {
 622                 if (*s == '\r' && s[1] == '\n')
 623                     s++;
 624                 *d++ = *s++;
 625             }
 626             SvCUR(sv) -= s - d;
 627             return;
 628         }
 629     }
 630 }
 631
 632 STATIC I32
 633 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 634 {
 635     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 636     if (count > 0 && !maxlen)
 637         strip_return(sv);
 638     return count;
 639 }
 640 #endif
 641
 642
 643
 644 /*
 645  * Perl_lex_start
 646  *
 647  * Create a parser object and initialise its parser and lexer fields
 648  *
 649  * rsfp       is the opened file handle to read from (if any),
 650  *
 651  * line       holds any initial content already read from the file (or in
 652  *            the case of no file, such as an eval, the whole contents);
 653  *
 654  * new_filter indicates that this is a new file and it shouldn't inherit
 655  *            the filters from the current parser (ie require).
 656  */
 657
 658 void
 659 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 660 {
 661     dVAR;
 662     const char *s = NULL;
 663     STRLEN len;
 664     yy_parser *parser, *oparser;
 665
 666     /* create and initialise a parser */
 667
 668     Newxz(parser, 1, yy_parser);
 669     parser->old_parser = oparser = PL_parser;
 670     PL_parser = parser;
 671
 672     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 673     parser->ps = parser->stack;
 674     parser->stack_size = YYINITDEPTH;
 675
 676     parser->stack->state = 0;
 677     parser->yyerrstatus = 0;
 678     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 679
 680     /* on scope exit, free this parser and restore any outer one */
 681     SAVEPARSER(parser);
 682     parser->saved_curcop = PL_curcop;
 683
 684     /* initialise lexer state */
 685
 686 #ifdef PERL_MAD
 687     parser->curforce = -1;
 688 #else
 689     parser->nexttoke = 0;
 690 #endif
 691     parser->copline = NOLINE;
 692     parser->lex_state = LEX_NORMAL;
 693     parser->expect = XSTATE;
 694     parser->rsfp = rsfp;
 695     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 696                 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
 697
 698     Newx(parser->lex_brackstack, 120, char);
 699     Newx(parser->lex_casestack, 12, char);
 700     *parser->lex_casestack = '\0';
 701
 702     if (line) {
 703         s = SvPV_const(line, len);
 704     } else {
 705         len = 0;
 706     }
 707
 708     if (!len) {
 709         parser->linestr = newSVpvs("\n;");
 710     } else if (SvREADONLY(line) || s[len-1] != ';') {
 711         parser->linestr = newSVsv(line);
 712         if (s[len-1] != ';')
 713             sv_catpvs(parser->linestr, "\n;");
 714     } else {
 715         SvTEMP_off(line);
 716         SvREFCNT_inc_simple_void_NN(line);
 717         parser->linestr = line;
 718     }
 719     parser->oldoldbufptr =
 720         parser->oldbufptr =
 721         parser->bufptr =
 722         parser->linestart = SvPVX(parser->linestr);
 723     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 724     parser->last_lop = parser->last_uni = NULL;
 725 }
 726
 727
 728 /* delete a parser object */
 729
 730 void
 731 Perl_parser_free(pTHX_  const yy_parser *parser)
 732 {
 733     PL_curcop = parser->saved_curcop;
 734     SvREFCNT_dec(parser->linestr);
 735
 736     if (parser->rsfp == PerlIO_stdin())
 737         PerlIO_clearerr(parser->rsfp);
 738     else if (parser->rsfp && parser->old_parser
 739                           && parser->rsfp != parser->old_parser->rsfp)
 740         PerlIO_close(parser->rsfp);
 741     SvREFCNT_dec(parser->rsfp_filters);
 742
 743     Safefree(parser->stack);
 744     Safefree(parser->lex_brackstack);
 745     Safefree(parser->lex_casestack);
 746     PL_parser = parser->old_parser;
 747     Safefree(parser);
 748 }
 749
 750
 751 /*
 752  * Perl_lex_end
 753  * Finalizer for lexing operations.  Must be called when the parser is
 754  * done with the lexer.
 755  */
 756
 757 void
 758 Perl_lex_end(pTHX)
 759 {
 760     dVAR;
 761     PL_doextract = FALSE;
 762 }
 763
 764 /*
 765  * S_incline
 766  * This subroutine has nothing to do with tilting, whether at windmills
 767  * or pinball tables.  Its name is short for "increment line".  It
 768  * increments the current line number in CopLINE(PL_curcop) and checks
 769  * to see whether the line starts with a comment of the form
 770  *    # line 500 "foo.pm"
 771  * If so, it sets the current line number and file to the values in the comment.
 772  */
 773
 774 STATIC void
 775 S_incline(pTHX_ const char *s)
 776 {
 777     dVAR;
 778     const char *t;
 779     const char *n;
 780     const char *e;
 781
 782     CopLINE_inc(PL_curcop);
 783     if (*s++ != '#')
 784         return;
 785     while (SPACE_OR_TAB(*s))
 786         s++;
 787     if (strnEQ(s, "line", 4))
 788         s += 4;
 789     else
 790         return;
 791     if (SPACE_OR_TAB(*s))
 792         s++;
 793     else
 794         return;
 795     while (SPACE_OR_TAB(*s))
 796         s++;
 797     if (!isDIGIT(*s))
 798         return;
 799
 800     n = s;
 801     while (isDIGIT(*s))
 802         s++;
 803     while (SPACE_OR_TAB(*s))
 804         s++;
 805     if (*s == '"' && (t = strchr(s+1, '"'))) {
 806         s++;
 807         e = t + 1;
 808     }
 809     else {
 810         t = s;
 811         while (!isSPACE(*t))
 812             t++;
 813         e = t;
 814     }
 815     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
 816         e++;
 817     if (*e != '\n' && *e != '\0')
 818         return;         /* false alarm */
 819
 820     if (t - s > 0) {
 821         const STRLEN len = t - s;
 822 #ifndef USE_ITHREADS
 823         const char * const cf = CopFILE(PL_curcop);
 824         STRLEN tmplen = cf ? strlen(cf) : 0;
 825         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
 826             /* must copy *{"::_<(eval N)[oldfilename:L]"}
 827              * to *{"::_<newfilename"} */
 828             /* However, the long form of evals is only turned on by the
 829                debugger - usually they're "(eval %lu)" */
 830             char smallbuf[128];
 831             char *tmpbuf;
 832             GV **gvp;
 833             STRLEN tmplen2 = len;
 834             if (tmplen + 2 <= sizeof smallbuf)
 835                 tmpbuf = smallbuf;
 836             else
 837                 Newx(tmpbuf, tmplen + 2, char);
 838             tmpbuf[0] = '_';
 839             tmpbuf[1] = '<';
 840             memcpy(tmpbuf + 2, cf, tmplen);
 841             tmplen += 2;
 842             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
 843             if (gvp) {
 844                 char *tmpbuf2;
 845                 GV *gv2;
 846
 847                 if (tmplen2 + 2 <= sizeof smallbuf)
 848                     tmpbuf2 = smallbuf;
 849                 else
 850                     Newx(tmpbuf2, tmplen2 + 2, char);
 851
 852                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
 853                     /* Either they malloc'd it, or we malloc'd it,
 854                        so no prefix is present in ours.  */
 855                     tmpbuf2[0] = '_';
 856                     tmpbuf2[1] = '<';
 857                 }
 858
 859                 memcpy(tmpbuf2 + 2, s, tmplen2);
 860                 tmplen2 += 2;
 861
 862                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
 863                 if (!isGV(gv2)) {
 864                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
 865                     /* adjust ${"::_<newfilename"} to store the new file name */
 866                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
 867                     GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
 868                     GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
 869                 }
 870
 871                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
 872             }
 873             if (tmpbuf != smallbuf) Safefree(tmpbuf);
 874         }
 875 #endif
 876         CopFILE_free(PL_curcop);
 877         CopFILE_setn(PL_curcop, s, len);
 878     }
 879     CopLINE_set(PL_curcop, atoi(n)-1);
 880 }
 881
 882 #ifdef PERL_MAD
 883 /* skip space before PL_thistoken */
 884
 885 STATIC char *
 886 S_skipspace0(pTHX_ register char *s)
 887 {
 888     s = skipspace(s);
 889     if (!PL_madskills)
 890         return s;
 891     if (PL_skipwhite) {
 892         if (!PL_thiswhite)
 893             PL_thiswhite = newSVpvs("");
 894         sv_catsv(PL_thiswhite, PL_skipwhite);
 895         sv_free(PL_skipwhite);
 896         PL_skipwhite = 0;
 897     }
 898     PL_realtokenstart = s - SvPVX(PL_linestr);
 899     return s;
 900 }
 901
 902 /* skip space after PL_thistoken */
 903
 904 STATIC char *
 905 S_skipspace1(pTHX_ register char *s)
 906 {
 907     const char *start = s;
 908     I32 startoff = start - SvPVX(PL_linestr);
 909
 910     s = skipspace(s);
 911     if (!PL_madskills)
 912         return s;
 913     start = SvPVX(PL_linestr) + startoff;
 914     if (!PL_thistoken && PL_realtokenstart >= 0) {
 915         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
 916         PL_thistoken = newSVpvn(tstart, start - tstart);
 917     }
 918     PL_realtokenstart = -1;
 919     if (PL_skipwhite) {
 920         if (!PL_nextwhite)
 921             PL_nextwhite = newSVpvs("");
 922         sv_catsv(PL_nextwhite, PL_skipwhite);
 923         sv_free(PL_skipwhite);
 924         PL_skipwhite = 0;
 925     }
 926     return s;
 927 }
 928
 929 STATIC char *
 930 S_skipspace2(pTHX_ register char *s, SV **svp)
 931 {
 932     char *start;
 933     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
 934     const I32 startoff = s - SvPVX(PL_linestr);
 935
 936     s = skipspace(s);
 937     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
 938     if (!PL_madskills || !svp)
 939         return s;
 940     start = SvPVX(PL_linestr) + startoff;
 941     if (!PL_thistoken && PL_realtokenstart >= 0) {
 942         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
 943         PL_thistoken = newSVpvn(tstart, start - tstart);
 944         PL_realtokenstart = -1;
 945     }
 946     if (PL_skipwhite) {
 947         if (!*svp)
 948             *svp = newSVpvs("");
 949         sv_setsv(*svp, PL_skipwhite);
 950         sv_free(PL_skipwhite);
 951         PL_skipwhite = 0;
 952     }
 953
 954     return s;
 955 }
 956 #endif
 957
 958 STATIC void
 959 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
 960 {
 961     AV *av = CopFILEAVx(PL_curcop);
 962     if (av) {
 963         SV * const sv = newSV_type(SVt_PVMG);
 964         if (orig_sv)
 965             sv_setsv(sv, orig_sv);
 966         else
 967             sv_setpvn(sv, buf, len);
 968         (void)SvIOK_on(sv);
 969         SvIV_set(sv, 0);
 970         av_store(av, (I32)CopLINE(PL_curcop), sv);
 971     }
 972 }
 973
 974 /*
 975  * S_skipspace
 976  * Called to gobble the appropriate amount and type of whitespace.
 977  * Skips comments as well.
 978  */
 979
 980 STATIC char *
 981 S_skipspace(pTHX_ register char *s)
 982 {
 983     dVAR;
 984 #ifdef PERL_MAD
 985     int curoff;
 986     int startoff = s - SvPVX(PL_linestr);
 987
 988     if (PL_skipwhite) {
 989         sv_free(PL_skipwhite);
 990         PL_skipwhite = 0;
 991     }
 992 #endif
 993
 994     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
 995         while (s < PL_bufend && SPACE_OR_TAB(*s))
 996             s++;
 997 #ifdef PERL_MAD
 998         goto done;
 999 #else
1000         return s;
1001 #endif
1002     }
1003     for (;;) {
1004         STRLEN prevlen;
1005         SSize_t oldprevlen, oldoldprevlen;
1006         SSize_t oldloplen = 0, oldunilen = 0;
1007         while (s < PL_bufend && isSPACE(*s)) {
1008             if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1009                 incline(s);
1010         }
1011
1012         /* comment */
1013         if (s < PL_bufend && *s == '#') {
1014             while (s < PL_bufend && *s != '\n')
1015                 s++;
1016             if (s < PL_bufend) {
1017                 s++;
1018                 if (PL_in_eval && !PL_rsfp) {
1019                     incline(s);
1020                     continue;
1021                 }
1022             }
1023         }
1024
1025         /* only continue to recharge the buffer if we're at the end
1026          * of the buffer, we're not reading from a source filter, and
1027          * we're in normal lexing mode
1028          */
1029         if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1030                 PL_lex_state == LEX_FORMLINE)
1031 #ifdef PERL_MAD
1032             goto done;
1033 #else
1034             return s;
1035 #endif
1036
1037         /* try to recharge the buffer */
1038 #ifdef PERL_MAD
1039         curoff = s - SvPVX(PL_linestr);
1040 #endif
1041
1042         if ((s = filter_gets(PL_linestr, PL_rsfp,
1043                              (prevlen = SvCUR(PL_linestr)))) == NULL)
1044         {
1045 #ifdef PERL_MAD
1046             if (PL_madskills && curoff != startoff) {
1047                 if (!PL_skipwhite)
1048                     PL_skipwhite = newSVpvs("");
1049                 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1050                                         curoff - startoff);
1051             }
1052
1053             /* mustn't throw out old stuff yet if madpropping */
1054             SvCUR(PL_linestr) = curoff;
1055             s = SvPVX(PL_linestr) + curoff;
1056             *s = 0;
1057             if (curoff && s[-1] == '\n')
1058                 s[-1] = ' ';
1059 #endif
1060
1061             /* end of file.  Add on the -p or -n magic */
1062             /* XXX these shouldn't really be added here, can't set PL_faketokens */
1063             if (PL_minus_p) {
1064 #ifdef PERL_MAD
1065                 sv_catpvs(PL_linestr,
1066                          ";}continue{print or die qq(-p destination: $!\\n);}");
1067 #else
1068                 sv_setpvs(PL_linestr,
1069                          ";}continue{print or die qq(-p destination: $!\\n);}");
1070 #endif
1071                 PL_minus_n = PL_minus_p = 0;
1072             }
1073             else if (PL_minus_n) {
1074 #ifdef PERL_MAD
1075                 sv_catpvn(PL_linestr, ";}", 2);
1076 #else
1077                 sv_setpvn(PL_linestr, ";}", 2);
1078 #endif
1079                 PL_minus_n = 0;
1080             }
1081             else
1082 #ifdef PERL_MAD
1083                 sv_catpvn(PL_linestr,";", 1);
1084 #else
1085                 sv_setpvn(PL_linestr,";", 1);
1086 #endif
1087
1088             /* reset variables for next time we lex */
1089             PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1090                 = SvPVX(PL_linestr)
1091 #ifdef PERL_MAD
1092                 + curoff
1093 #endif
1094                 ;
1095             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1096             PL_last_lop = PL_last_uni = NULL;
1097
1098             /* Close the filehandle.  Could be from -P preprocessor,
1099              * STDIN, or a regular file.  If we were reading code from
1100              * STDIN (because the commandline held no -e or filename)
1101              * then we don't close it, we reset it so the code can
1102              * read from STDIN too.
1103              */
1104
1105             if (PL_preprocess && !PL_in_eval)
1106                 (void)PerlProc_pclose(PL_rsfp);
1107             else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1108                 PerlIO_clearerr(PL_rsfp);
1109             else
1110                 (void)PerlIO_close(PL_rsfp);
1111             PL_rsfp = NULL;
1112             return s;
1113         }
1114
1115         /* not at end of file, so we only read another line */
1116         /* make corresponding updates to old pointers, for yyerror() */
1117         oldprevlen = PL_oldbufptr - PL_bufend;
1118         oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1119         if (PL_last_uni)
1120             oldunilen = PL_last_uni - PL_bufend;
1121         if (PL_last_lop)
1122             oldloplen = PL_last_lop - PL_bufend;
1123         PL_linestart = PL_bufptr = s + prevlen;
1124         PL_bufend = s + SvCUR(PL_linestr);
1125         s = PL_bufptr;
1126         PL_oldbufptr = s + oldprevlen;
1127         PL_oldoldbufptr = s + oldoldprevlen;
1128         if (PL_last_uni)
1129             PL_last_uni = s + oldunilen;
1130         if (PL_last_lop)
1131             PL_last_lop = s + oldloplen;
1132         incline(s);
1133
1134         /* debugger active and we're not compiling the debugger code,
1135          * so store the line into the debugger's array of lines
1136          */
1137         if (PERLDB_LINE && PL_curstash != PL_debstash)
1138             update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1139     }
1140
1141 #ifdef PERL_MAD
1142   done:
1143     if (PL_madskills) {
1144         if (!PL_skipwhite)
1145             PL_skipwhite = newSVpvs("");
1146         curoff = s - SvPVX(PL_linestr);
1147         if (curoff - startoff)
1148             sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1149                                 curoff - startoff);
1150     }
1151     return s;
1152 #endif
1153 }
1154
1155 /*
1156  * S_check_uni
1157  * Check the unary operators to ensure there's no ambiguity in how they're
1158  * used.  An ambiguous piece of code would be:
1159  *     rand + 5
1160  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1161  * the +5 is its argument.
1162  */
1163
1164 STATIC void
1165 S_check_uni(pTHX)
1166 {
1167     dVAR;
1168     const char *s;
1169     const char *t;
1170
1171     if (PL_oldoldbufptr != PL_last_uni)
1172         return;
1173     while (isSPACE(*PL_last_uni))
1174         PL_last_uni++;
1175     s = PL_last_uni;
1176     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1177         s++;
1178     if ((t = strchr(s, '(')) && t < PL_bufptr)
1179         return;
1180
1181     if (ckWARN_d(WARN_AMBIGUOUS)){
1182         Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1183                    "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1184                    (int)(s - PL_last_uni), PL_last_uni);
1185     }
1186 }
1187
1188 /*
1189  * LOP : macro to build a list operator.  Its behaviour has been replaced
1190  * with a subroutine, S_lop() for which LOP is just another name.
1191  */
1192
1193 #define LOP(f,x) return lop(f,x,s)
1194
1195 /*
1196  * S_lop
1197  * Build a list operator (or something that might be one).  The rules:
1198  *  - if we have a next token, then it's a list operator [why?]
1199  *  - if the next thing is an opening paren, then it's a function
1200  *  - else it's a list operator
1201  */
1202
1203 STATIC I32
1204 S_lop(pTHX_ I32 f, int x, char *s)
1205 {
1206     dVAR;
1207     yylval.ival = f;
1208     CLINE;
1209     PL_expect = x;
1210     PL_bufptr = s;
1211     PL_last_lop = PL_oldbufptr;
1212     PL_last_lop_op = (OPCODE)f;
1213 #ifdef PERL_MAD
1214     if (PL_lasttoke)
1215         return REPORT(LSTOP);
1216 #else
1217     if (PL_nexttoke)
1218         return REPORT(LSTOP);
1219 #endif
1220     if (*s == '(')
1221         return REPORT(FUNC);
1222     s = PEEKSPACE(s);
1223     if (*s == '(')
1224         return REPORT(FUNC);
1225     else
1226         return REPORT(LSTOP);
1227 }
1228
1229 #ifdef PERL_MAD
1230  /*
1231  * S_start_force
1232  * Sets up for an eventual force_next().  start_force(0) basically does
1233  * an unshift, while start_force(-1) does a push.  yylex removes items
1234  * on the "pop" end.
1235  */
1236
1237 STATIC void
1238 S_start_force(pTHX_ int where)
1239 {
1240     int i;
1241
1242     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1243         where = PL_lasttoke;
1244     assert(PL_curforce < 0 || PL_curforce == where);
1245     if (PL_curforce != where) {
1246         for (i = PL_lasttoke; i > where; --i) {
1247             PL_nexttoke[i] = PL_nexttoke[i-1];
1248         }
1249         PL_lasttoke++;
1250     }
1251     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1252         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1253     PL_curforce = where;
1254     if (PL_nextwhite) {
1255         if (PL_madskills)
1256             curmad('^', newSVpvs(""));
1257         CURMAD('_', PL_nextwhite);
1258     }
1259 }
1260
1261 STATIC void
1262 S_curmad(pTHX_ char slot, SV *sv)
1263 {
1264     MADPROP **where;
1265
1266     if (!sv)
1267         return;
1268     if (PL_curforce < 0)
1269         where = &PL_thismad;
1270     else
1271         where = &PL_nexttoke[PL_curforce].next_mad;
1272
1273     if (PL_faketokens)
1274         sv_setpvn(sv, "", 0);
1275     else {
1276         if (!IN_BYTES) {
1277             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1278                 SvUTF8_on(sv);
1279             else if (PL_encoding) {
1280                 sv_recode_to_utf8(sv, PL_encoding);
1281             }
1282         }
1283     }
1284
1285     /* keep a slot open for the head of the list? */
1286     if (slot != '_' && *where && (*where)->mad_key == '^') {
1287         (*where)->mad_key = slot;
1288         sv_free((SV*)((*where)->mad_val));
1289         (*where)->mad_val = (void*)sv;
1290     }
1291     else
1292         addmad(newMADsv(slot, sv), where, 0);
1293 }
1294 #else
1295 #  define start_force(where)    NOOP
1296 #  define curmad(slot, sv)      NOOP
1297 #endif
1298
1299 /*
1300  * S_force_next
1301  * When the lexer realizes it knows the next token (for instance,
1302  * it is reordering tokens for the parser) then it can call S_force_next
1303  * to know what token to return the next time the lexer is called.  Caller
1304  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1305  * and possibly PL_expect to ensure the lexer handles the token correctly.
1306  */
1307
1308 STATIC void
1309 S_force_next(pTHX_ I32 type)
1310 {
1311     dVAR;
1312 #ifdef PERL_MAD
1313     if (PL_curforce < 0)
1314         start_force(PL_lasttoke);
1315     PL_nexttoke[PL_curforce].next_type = type;
1316     if (PL_lex_state != LEX_KNOWNEXT)
1317         PL_lex_defer = PL_lex_state;
1318     PL_lex_state = LEX_KNOWNEXT;
1319     PL_lex_expect = PL_expect;
1320     PL_curforce = -1;
1321 #else
1322     PL_nexttype[PL_nexttoke] = type;
1323     PL_nexttoke++;
1324     if (PL_lex_state != LEX_KNOWNEXT) {
1325         PL_lex_defer = PL_lex_state;
1326         PL_lex_expect = PL_expect;
1327         PL_lex_state = LEX_KNOWNEXT;
1328     }
1329 #endif
1330 }
1331
1332 STATIC SV *
1333 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1334 {
1335     dVAR;
1336     SV * const sv = newSVpvn(start,len);
1337     if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1338         SvUTF8_on(sv);
1339     return sv;
1340 }
1341
1342 /*
1343  * S_force_word
1344  * When the lexer knows the next thing is a word (for instance, it has
1345  * just seen -> and it knows that the next char is a word char, then
1346  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1347  * lookahead.
1348  *
1349  * Arguments:
1350  *   char *start : buffer position (must be within PL_linestr)
1351  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1352  *   int check_keyword : if true, Perl checks to make sure the word isn't
1353  *       a keyword (do this if the word is a label, e.g. goto FOO)
1354  *   int allow_pack : if true, : characters will also be allowed (require,
1355  *       use, etc. do this)
1356  *   int allow_initial_tick : used by the "sub" lexer only.
1357  */
1358
1359 STATIC char *
1360 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1361 {
1362     dVAR;
1363     register char *s;
1364     STRLEN len;
1365
1366     start = SKIPSPACE1(start);
1367     s = start;
1368     if (isIDFIRST_lazy_if(s,UTF) ||
1369         (allow_pack && *s == ':') ||
1370         (allow_initial_tick && *s == '\'') )
1371     {
1372         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1373         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1374             return start;
1375         start_force(PL_curforce);
1376         if (PL_madskills)
1377             curmad('X', newSVpvn(start,s-start));
1378         if (token == METHOD) {
1379             s = SKIPSPACE1(s);
1380             if (*s == '(')
1381                 PL_expect = XTERM;
1382             else {
1383                 PL_expect = XOPERATOR;
1384             }
1385         }
1386         if (PL_madskills)
1387             curmad('g', newSVpvs( "forced" ));
1388         NEXTVAL_NEXTTOKE.opval
1389             = (OP*)newSVOP(OP_CONST,0,
1390                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1391         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1392         force_next(token);
1393     }
1394     return s;
1395 }
1396
1397 /*
1398  * S_force_ident
1399  * Called when the lexer wants $foo *foo &foo etc, but the program
1400  * text only contains the "foo" portion.  The first argument is a pointer
1401  * to the "foo", and the second argument is the type symbol to prefix.
1402  * Forces the next token to be a "WORD".
1403  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1404  */
1405
1406 STATIC void
1407 S_force_ident(pTHX_ register const char *s, int kind)
1408 {
1409     dVAR;
1410     if (*s) {
1411         const STRLEN len = strlen(s);
1412         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1413         start_force(PL_curforce);
1414         NEXTVAL_NEXTTOKE.opval = o;
1415         force_next(WORD);
1416         if (kind) {
1417             o->op_private = OPpCONST_ENTERED;
1418             /* XXX see note in pp_entereval() for why we forgo typo
1419                warnings if the symbol must be introduced in an eval.
1420                GSAR 96-10-12 */
1421             gv_fetchpvn_flags(s, len,
1422                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1423                               : GV_ADD,
1424                               kind == '$' ? SVt_PV :
1425                               kind == '@' ? SVt_PVAV :
1426                               kind == '%' ? SVt_PVHV :
1427                               SVt_PVGV
1428                               );
1429         }
1430     }
1431 }
1432
1433 NV
1434 Perl_str_to_version(pTHX_ SV *sv)
1435 {
1436     NV retval = 0.0;
1437     NV nshift = 1.0;
1438     STRLEN len;
1439     const char *start = SvPV_const(sv,len);
1440     const char * const end = start + len;
1441     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1442     while (start < end) {
1443         STRLEN skip;
1444         UV n;
1445         if (utf)
1446             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1447         else {
1448             n = *(U8*)start;
1449             skip = 1;
1450         }
1451         retval += ((NV)n)/nshift;
1452         start += skip;
1453         nshift *= 1000;
1454     }
1455     return retval;
1456 }
1457
1458 /*
1459  * S_force_version
1460  * Forces the next token to be a version number.
1461  * If the next token appears to be an invalid version number, (e.g. "v2b"),
1462  * and if "guessing" is TRUE, then no new token is created (and the caller
1463  * must use an alternative parsing method).
1464  */
1465
1466 STATIC char *
1467 S_force_version(pTHX_ char *s, int guessing)
1468 {
1469     dVAR;
1470     OP *version = NULL;
1471     char *d;
1472 #ifdef PERL_MAD
1473     I32 startoff = s - SvPVX(PL_linestr);
1474 #endif
1475
1476     s = SKIPSPACE1(s);
1477
1478     d = s;
1479     if (*d == 'v')
1480         d++;
1481     if (isDIGIT(*d)) {
1482         while (isDIGIT(*d) || *d == '_' || *d == '.')
1483             d++;
1484 #ifdef PERL_MAD
1485         if (PL_madskills) {
1486             start_force(PL_curforce);
1487             curmad('X', newSVpvn(s,d-s));
1488         }
1489 #endif
1490         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1491             SV *ver;
1492             s = scan_num(s, &yylval);
1493             version = yylval.opval;
1494             ver = cSVOPx(version)->op_sv;
1495             if (SvPOK(ver) && !SvNIOK(ver)) {
1496                 SvUPGRADE(ver, SVt_PVNV);
1497                 SvNV_set(ver, str_to_version(ver));
1498                 SvNOK_on(ver);          /* hint that it is a version */
1499             }
1500         }
1501         else if (guessing) {
1502 #ifdef PERL_MAD
1503             if (PL_madskills) {
1504                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
1505                 PL_nextwhite = 0;
1506                 s = SvPVX(PL_linestr) + startoff;
1507             }
1508 #endif
1509             return s;
1510         }
1511     }
1512
1513 #ifdef PERL_MAD
1514     if (PL_madskills && !version) {
1515         sv_free(PL_nextwhite);  /* let next token collect whitespace */
1516         PL_nextwhite = 0;
1517         s = SvPVX(PL_linestr) + startoff;
1518     }
1519 #endif
1520     /* NOTE: The parser sees the package name and the VERSION swapped */
1521     start_force(PL_curforce);
1522     NEXTVAL_NEXTTOKE.opval = version;
1523     force_next(WORD);
1524
1525     return s;
1526 }
1527
1528 /*
1529  * S_tokeq
1530  * Tokenize a quoted string passed in as an SV.  It finds the next
1531  * chunk, up to end of string or a backslash.  It may make a new
1532  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
1533  * turns \\ into \.
1534  */
1535
1536 STATIC SV *
1537 S_tokeq(pTHX_ SV *sv)
1538 {
1539     dVAR;
1540     register char *s;
1541     register char *send;
1542     register char *d;
1543     STRLEN len = 0;
1544     SV *pv = sv;
1545
1546     if (!SvLEN(sv))
1547         goto finish;
1548
1549     s = SvPV_force(sv, len);
1550     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1551         goto finish;
1552     send = s + len;
1553     while (s < send && *s != '\\')
1554         s++;
1555     if (s == send)
1556         goto finish;
1557     d = s;
1558     if ( PL_hints & HINT_NEW_STRING ) {
1559         pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1560         if (SvUTF8(sv))
1561             SvUTF8_on(pv);
1562     }
1563     while (s < send) {
1564         if (*s == '\\') {
1565             if (s + 1 < send && (s[1] == '\\'))
1566                 s++;            /* all that, just for this */
1567         }
1568         *d++ = *s++;
1569     }
1570     *d = '\0';
1571     SvCUR_set(sv, d - SvPVX_const(sv));
1572   finish:
1573     if ( PL_hints & HINT_NEW_STRING )
1574        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1575     return sv;
1576 }
1577
1578 /*
1579  * Now come three functions related to double-quote context,
1580  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
1581  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
1582  * interact with PL_lex_state, and create fake ( ... ) argument lists
1583  * to handle functions and concatenation.
1584  * They assume that whoever calls them will be setting up a fake
1585  * join call, because each subthing puts a ',' after it.  This lets
1586  *   "lower \luPpEr"
1587  * become
1588  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1589  *
1590  * (I'm not sure whether the spurious commas at the end of lcfirst's
1591  * arguments and join's arguments are created or not).
1592  */
1593
1594 /*
1595  * S_sublex_start
1596  * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1597  *
1598  * Pattern matching will set PL_lex_op to the pattern-matching op to
1599  * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1600  *
1601  * OP_CONST and OP_READLINE are easy--just make the new op and return.
1602  *
1603  * Everything else becomes a FUNC.
1604  *
1605  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1606  * had an OP_CONST or OP_READLINE).  This just sets us up for a
1607  * call to S_sublex_push().
1608  */
1609
1610 STATIC I32
1611 S_sublex_start(pTHX)
1612 {
1613     dVAR;
1614     register const I32 op_type = yylval.ival;
1615
1616     if (op_type == OP_NULL) {
1617         yylval.opval = PL_lex_op;
1618         PL_lex_op = NULL;
1619         return THING;
1620     }
1621     if (op_type == OP_CONST || op_type == OP_READLINE) {
1622         SV *sv = tokeq(PL_lex_stuff);
1623
1624         if (SvTYPE(sv) == SVt_PVIV) {
1625             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1626             STRLEN len;
1627             const char * const p = SvPV_const(sv, len);
1628             SV * const nsv = newSVpvn(p, len);
1629             if (SvUTF8(sv))
1630                 SvUTF8_on(nsv);
1631             SvREFCNT_dec(sv);
1632             sv = nsv;
1633         }
1634         yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1635         PL_lex_stuff = NULL;
1636         /* Allow <FH> // "foo" */
1637         if (op_type == OP_READLINE)
1638             PL_expect = XTERMORDORDOR;
1639         return THING;
1640     }
1641     else if (op_type == OP_BACKTICK && PL_lex_op) {
1642         /* readpipe() vas overriden */
1643         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1644         yylval.opval = PL_lex_op;
1645         PL_lex_op = NULL;
1646         PL_lex_stuff = NULL;
1647         return THING;
1648     }
1649
1650     PL_sublex_info.super_state = PL_lex_state;
1651     PL_sublex_info.sub_inwhat = (U16)op_type;
1652     PL_sublex_info.sub_op = PL_lex_op;
1653     PL_lex_state = LEX_INTERPPUSH;
1654
1655     PL_expect = XTERM;
1656     if (PL_lex_op) {
1657         yylval.opval = PL_lex_op;
1658         PL_lex_op = NULL;
1659         return PMFUNC;
1660     }
1661     else
1662         return FUNC;
1663 }
1664
1665 /*
1666  * S_sublex_push
1667  * Create a new scope to save the lexing state.  The scope will be
1668  * ended in S_sublex_done.  Returns a '(', starting the function arguments
1669  * to the uc, lc, etc. found before.
1670  * Sets PL_lex_state to LEX_INTERPCONCAT.
1671  */
1672
1673 STATIC I32
1674 S_sublex_push(pTHX)
1675 {
1676     dVAR;
1677     ENTER;
1678
1679     PL_lex_state = PL_sublex_info.super_state;
1680     SAVEBOOL(PL_lex_dojoin);
1681     SAVEI32(PL_lex_brackets);
1682     SAVEI32(PL_lex_casemods);
1683     SAVEI32(PL_lex_starts);
1684     SAVEI8(PL_lex_state);
1685     SAVEVPTR(PL_lex_inpat);
1686     SAVEI16(PL_lex_inwhat);
1687     SAVECOPLINE(PL_curcop);
1688     SAVEPPTR(PL_bufptr);
1689     SAVEPPTR(PL_bufend);
1690     SAVEPPTR(PL_oldbufptr);
1691     SAVEPPTR(PL_oldoldbufptr);
1692     SAVEPPTR(PL_last_lop);
1693     SAVEPPTR(PL_last_uni);
1694     SAVEPPTR(PL_linestart);
1695     SAVESPTR(PL_linestr);
1696     SAVEGENERICPV(PL_lex_brackstack);
1697     SAVEGENERICPV(PL_lex_casestack);
1698
1699     PL_linestr = PL_lex_stuff;
1700     PL_lex_stuff = NULL;
1701
1702     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1703         = SvPVX(PL_linestr);
1704     PL_bufend += SvCUR(PL_linestr);
1705     PL_last_lop = PL_last_uni = NULL;
1706     SAVEFREESV(PL_linestr);
1707
1708     PL_lex_dojoin = FALSE;
1709     PL_lex_brackets = 0;
1710     Newx(PL_lex_brackstack, 120, char);
1711     Newx(PL_lex_casestack, 12, char);
1712     PL_lex_casemods = 0;
1713     *PL_lex_casestack = '\0';
1714     PL_lex_starts = 0;
1715     PL_lex_state = LEX_INTERPCONCAT;
1716     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1717
1718     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1719     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1720         PL_lex_inpat = PL_sublex_info.sub_op;
1721     else
1722         PL_lex_inpat = NULL;
1723
1724     return '(';
1725 }
1726
1727 /*
1728  * S_sublex_done
1729  * Restores lexer state after a S_sublex_push.
1730  */
1731
1732 STATIC I32
1733 S_sublex_done(pTHX)
1734 {
1735     dVAR;
1736     if (!PL_lex_starts++) {
1737         SV * const sv = newSVpvs("");
1738         if (SvUTF8(PL_linestr))
1739             SvUTF8_on(sv);
1740         PL_expect = XOPERATOR;
1741         yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1742         return THING;
1743     }
1744
1745     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
1746         PL_lex_state = LEX_INTERPCASEMOD;
1747         return yylex();
1748     }
1749
1750     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1751     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1752         PL_linestr = PL_lex_repl;
1753         PL_lex_inpat = 0;
1754         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1755         PL_bufend += SvCUR(PL_linestr);
1756         PL_last_lop = PL_last_uni = NULL;
1757         SAVEFREESV(PL_linestr);
1758         PL_lex_dojoin = FALSE;
1759         PL_lex_brackets = 0;
1760         PL_lex_casemods = 0;
1761         *PL_lex_casestack = '\0';
1762         PL_lex_starts = 0;
1763         if (SvEVALED(PL_lex_repl)) {
1764             PL_lex_state = LEX_INTERPNORMAL;
1765             PL_lex_starts++;
1766             /*  we don't clear PL_lex_repl here, so that we can check later
1767                 whether this is an evalled subst; that means we rely on the
1768                 logic to ensure sublex_done() is called again only via the
1769                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1770         }
1771         else {
1772             PL_lex_state = LEX_INTERPCONCAT;
1773             PL_lex_repl = NULL;
1774         }
1775         return ',';
1776     }
1777     else {
1778 #ifdef PERL_MAD
1779         if (PL_madskills) {
1780             if (PL_thiswhite) {
1781                 if (!PL_endwhite)
1782                     PL_endwhite = newSVpvs("");
1783                 sv_catsv(PL_endwhite, PL_thiswhite);
1784                 PL_thiswhite = 0;
1785             }
1786             if (PL_thistoken)
1787                 sv_setpvn(PL_thistoken,"",0);
1788             else
1789                 PL_realtokenstart = -1;
1790         }
1791 #endif
1792         LEAVE;
1793         PL_bufend = SvPVX(PL_linestr);
1794         PL_bufend += SvCUR(PL_linestr);
1795         PL_expect = XOPERATOR;
1796         PL_sublex_info.sub_inwhat = 0;
1797         return ')';
1798     }
1799 }
1800
1801 /*
1802   scan_const
1803
1804   Extracts a pattern, double-quoted string, or transliteration.  This
1805   is terrifying code.
1806
1807   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1808   processing a pattern (PL_lex_inpat is true), a transliteration
1809   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1810
1811   Returns a pointer to the character scanned up to. If this is
1812   advanced from the start pointer supplied (i.e. if anything was
1813   successfully parsed), will leave an OP for the substring scanned
1814   in yylval. Caller must intuit reason for not parsing further
1815   by looking at the next characters herself.
1816
1817   In patterns:
1818     backslashes:
1819       double-quoted style: \r and \n
1820       regexp special ones: \D \s
1821       constants: \x31
1822       backrefs: \1
1823       case and quoting: \U \Q \E
1824     stops on @ and $, but not for $ as tail anchor
1825
1826   In transliterations:
1827     characters are VERY literal, except for - not at the start or end
1828     of the string, which indicates a range. If the range is in bytes,
1829     scan_const expands the range to the full set of intermediate
1830     characters. If the range is in utf8, the hyphen is replaced with
1831     a certain range mark which will be handled by pmtrans() in op.c.
1832
1833   In double-quoted strings:
1834     backslashes:
1835       double-quoted style: \r and \n
1836       constants: \x31
1837       deprecated backrefs: \1 (in substitution replacements)
1838       case and quoting: \U \Q \E
1839     stops on @ and $
1840
1841   scan_const does *not* construct ops to handle interpolated strings.
1842   It stops processing as soon as it finds an embedded $ or @ variable
1843   and leaves it to the caller to work out what's going on.
1844
1845   embedded arrays (whether in pattern or not) could be:
1846       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1847
1848   $ in double-quoted strings must be the symbol of an embedded scalar.
1849
1850   $ in pattern could be $foo or could be tail anchor.  Assumption:
1851   it's a tail anchor if $ is the last thing in the string, or if it's
1852   followed by one of "()| \r\n\t"
1853
1854   \1 (backreferences) are turned into $1
1855
1856   The structure of the code is
1857       while (there's a character to process) {
1858           handle transliteration ranges
1859           skip regexp comments /(?#comment)/ and codes /(?{code})/
1860           skip #-initiated comments in //x patterns
1861           check for embedded arrays
1862           check for embedded scalars
1863           if (backslash) {
1864               leave intact backslashes from leaveit (below)
1865               deprecate \1 in substitution replacements
1866               handle string-changing backslashes \l \U \Q \E, etc.
1867               switch (what was escaped) {
1868                   handle \- in a transliteration (becomes a literal -)
1869                   handle \132 (octal characters)
1870                   handle \x15 and \x{1234} (hex characters)
1871                   handle \N{name} (named characters)
1872                   handle \cV (control characters)
1873                   handle printf-style backslashes (\f, \r, \n, etc)
1874               } (end switch)
1875           } (end if backslash)
1876     } (end while character to read)
1877
1878 */
1879
1880 STATIC char *
1881 S_scan_const(pTHX_ char *start)
1882 {
1883     dVAR;
1884     register char *send = PL_bufend;            /* end of the constant */
1885     SV *sv = newSV(send - start);               /* sv for the constant */
1886     register char *s = start;                   /* start of the constant */
1887     register char *d = SvPVX(sv);               /* destination for copies */
1888     bool dorange = FALSE;                       /* are we in a translit range? */
1889     bool didrange = FALSE;                      /* did we just finish a range? */
1890     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
1891     I32  this_utf8 = UTF;                       /* The source string is assumed to be UTF8 */
1892     UV uv;
1893 #ifdef EBCDIC
1894     UV literal_endpoint = 0;
1895     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1896 #endif
1897
1898     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1899         /* If we are doing a trans and we know we want UTF8 set expectation */
1900         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1901         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1902     }
1903
1904
1905     while (s < send || dorange) {
1906         /* get transliterations out of the way (they're most literal) */
1907         if (PL_lex_inwhat == OP_TRANS) {
1908             /* expand a range A-Z to the full set of characters.  AIE! */
1909             if (dorange) {
1910                 I32 i;                          /* current expanded character */
1911                 I32 min;                        /* first character in range */
1912                 I32 max;                        /* last character in range */
1913
1914 #ifdef EBCDIC
1915                 UV uvmax = 0;
1916 #endif
1917
1918                 if (has_utf8
1919 #ifdef EBCDIC
1920                     && !native_range
1921 #endif
1922                     ) {
1923                     char * const c = (char*)utf8_hop((U8*)d, -1);
1924                     char *e = d++;
1925                     while (e-- > c)
1926                         *(e + 1) = *e;
1927                     *c = (char)UTF_TO_NATIVE(0xff);
1928                     /* mark the range as done, and continue */
1929                     dorange = FALSE;
1930                     didrange = TRUE;
1931                     continue;
1932                 }
1933
1934                 i = d - SvPVX_const(sv);                /* remember current offset */
1935 #ifdef EBCDIC
1936                 SvGROW(sv,
1937                        SvLEN(sv) + (has_utf8 ?
1938                                     (512 - UTF_CONTINUATION_MARK +
1939                                      UNISKIP(0x100))
1940                                     : 256));
1941                 /* How many two-byte within 0..255: 128 in UTF-8,
1942                  * 96 in UTF-8-mod. */
1943 #else
1944                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
1945 #endif
1946                 d = SvPVX(sv) + i;              /* refresh d after realloc */
1947 #ifdef EBCDIC
1948                 if (has_utf8) {
1949                     int j;
1950                     for (j = 0; j <= 1; j++) {
1951                         char * const c = (char*)utf8_hop((U8*)d, -1);
1952                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1953                         if (j)
1954                             min = (U8)uv;
1955                         else if (uv < 256)
1956                             max = (U8)uv;
1957                         else {
1958                             max = (U8)0xff; /* only to \xff */
1959                             uvmax = uv; /* \x{100} to uvmax */
1960                         }
1961                         d = c; /* eat endpoint chars */
1962                      }
1963                 }
1964                else {
1965 #endif
1966                    d -= 2;              /* eat the first char and the - */
1967                    min = (U8)*d;        /* first char in range */
1968                    max = (U8)d[1];      /* last char in range  */
1969 #ifdef EBCDIC
1970                }
1971 #endif
1972
1973                 if (min > max) {
1974                     Perl_croak(aTHX_
1975                                "Invalid range \"%c-%c\" in transliteration operator",
1976                                (char)min, (char)max);
1977                 }
1978
1979 #ifdef EBCDIC
1980                 if (literal_endpoint == 2 &&
1981                     ((isLOWER(min) && isLOWER(max)) ||
1982                      (isUPPER(min) && isUPPER(max)))) {
1983                     if (isLOWER(min)) {
1984                         for (i = min; i <= max; i++)
1985                             if (isLOWER(i))
1986                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
1987                     } else {
1988                         for (i = min; i <= max; i++)
1989                             if (isUPPER(i))
1990                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
1991                     }
1992                 }
1993                 else
1994 #endif
1995                     for (i = min; i <= max; i++)
1996 #ifdef EBCDIC
1997                         if (has_utf8) {
1998                             const U8 ch = (U8)NATIVE_TO_UTF(i);
1999                             if (UNI_IS_INVARIANT(ch))
2000                                 *d++ = (U8)i;
2001                             else {
2002                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2003                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2004                             }
2005                         }
2006                         else
2007 #endif
2008                             *d++ = (char)i;
2009
2010 #ifdef EBCDIC
2011                 if (uvmax) {
2012                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2013                     if (uvmax > 0x101)
2014                         *d++ = (char)UTF_TO_NATIVE(0xff);
2015                     if (uvmax > 0x100)
2016                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2017                 }
2018 #endif
2019
2020                 /* mark the range as done, and continue */
2021                 dorange = FALSE;
2022                 didrange = TRUE;
2023 #ifdef EBCDIC
2024                 literal_endpoint = 0;
2025 #endif
2026                 continue;
2027             }
2028
2029             /* range begins (ignore - as first or last char) */
2030             else if (*s == '-' && s+1 < send  && s != start) {
2031                 if (didrange) {
2032                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2033                 }
2034                 if (has_utf8
2035 #ifdef EBCDIC
2036                     && !native_range
2037 #endif
2038                     ) {
2039                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2040                     s++;
2041                     continue;
2042                 }
2043                 dorange = TRUE;
2044                 s++;
2045             }
2046             else {
2047                 didrange = FALSE;
2048 #ifdef EBCDIC
2049                 literal_endpoint = 0;
2050                 native_range = TRUE;
2051 #endif
2052             }
2053         }
2054
2055         /* if we get here, we're not doing a transliteration */
2056
2057         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2058            except for the last char, which will be done separately. */
2059         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2060             if (s[2] == '#') {
2061                 while (s+1 < send && *s != ')')
2062                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2063             }
2064             else if (s[2] == '{' /* This should match regcomp.c */
2065                     || (s[2] == '?' && s[3] == '{'))
2066             {
2067                 I32 count = 1;
2068                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2069                 char c;
2070
2071                 while (count && (c = *regparse)) {
2072                     if (c == '\\' && regparse[1])
2073                         regparse++;
2074                     else if (c == '{')
2075                         count++;
2076                     else if (c == '}')
2077                         count--;
2078                     regparse++;
2079                 }
2080                 if (*regparse != ')')
2081                     regparse--;         /* Leave one char for continuation. */
2082                 while (s < regparse)
2083                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2084             }
2085         }
2086
2087         /* likewise skip #-initiated comments in //x patterns */
2088         else if (*s == '#' && PL_lex_inpat &&
2089           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2090             while (s+1 < send && *s != '\n')
2091                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2092         }
2093
2094         /* check for embedded arrays
2095            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2096            */
2097         else if (*s == '@' && s[1]) {
2098             if (isALNUM_lazy_if(s+1,UTF))
2099                 break;
2100             if (strchr(":'{$", s[1]))
2101                 break;
2102             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2103                 break; /* in regexp, neither @+ nor @- are interpolated */
2104         }
2105
2106         /* check for embedded scalars.  only stop if we're sure it's a
2107            variable.
2108         */
2109         else if (*s == '$') {
2110             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2111                 break;
2112             if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2113                 break;          /* in regexp, $ might be tail anchor */
2114         }
2115
2116         /* End of else if chain - OP_TRANS rejoin rest */
2117
2118         /* backslashes */
2119         if (*s == '\\' && s+1 < send) {
2120             s++;
2121
2122             /* deprecate \1 in strings and substitution replacements */
2123             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2124                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2125             {
2126                 if (ckWARN(WARN_SYNTAX))
2127                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2128                 *--s = '$';
2129                 break;
2130             }
2131
2132             /* string-change backslash escapes */
2133             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2134                 --s;
2135                 break;
2136             }
2137             /* skip any other backslash escapes in a pattern */
2138             else if (PL_lex_inpat) {
2139                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2140                 goto default_action;
2141             }
2142
2143             /* if we get here, it's either a quoted -, or a digit */
2144             switch (*s) {
2145
2146             /* quoted - in transliterations */
2147             case '-':
2148                 if (PL_lex_inwhat == OP_TRANS) {
2149                     *d++ = *s++;
2150                     continue;
2151                 }
2152                 /* FALL THROUGH */
2153             default:
2154                 {
2155                     if ((isALPHA(*s) || isDIGIT(*s)) &&
2156                         ckWARN(WARN_MISC))
2157                         Perl_warner(aTHX_ packWARN(WARN_MISC),
2158                                     "Unrecognized escape \\%c passed through",
2159                                     *s);
2160                     /* default action is to copy the quoted character */
2161                     goto default_action;
2162                 }
2163
2164             /* \132 indicates an octal constant */
2165             case '0': case '1': case '2': case '3':
2166             case '4': case '5': case '6': case '7':
2167                 {
2168                     I32 flags = 0;
2169                     STRLEN len = 3;
2170                     uv = grok_oct(s, &len, &flags, NULL);
2171                     s += len;
2172                 }
2173                 goto NUM_ESCAPE_INSERT;
2174
2175             /* \x24 indicates a hex constant */
2176             case 'x':
2177                 ++s;
2178                 if (*s == '{') {
2179                     char* const e = strchr(s, '}');
2180                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2181                       PERL_SCAN_DISALLOW_PREFIX;
2182                     STRLEN len;
2183
2184                     ++s;
2185                     if (!e) {
2186                         yyerror("Missing right brace on \\x{}");
2187                         continue;
2188                     }
2189                     len = e - s;
2190                     uv = grok_hex(s, &len, &flags, NULL);
2191                     s = e + 1;
2192                 }
2193                 else {
2194                     {
2195                         STRLEN len = 2;
2196                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2197                         uv = grok_hex(s, &len, &flags, NULL);
2198                         s += len;
2199                     }
2200                 }
2201
2202               NUM_ESCAPE_INSERT:
2203                 /* Insert oct or hex escaped character.
2204                  * There will always enough room in sv since such
2205                  * escapes will be longer than any UTF-8 sequence
2206                  * they can end up as. */
2207
2208                 /* We need to map to chars to ASCII before doing the tests
2209                    to cover EBCDIC
2210                 */
2211                 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2212                     if (!has_utf8 && uv > 255) {
2213                         /* Might need to recode whatever we have
2214                          * accumulated so far if it contains any
2215                          * hibit chars.
2216                          *
2217                          * (Can't we keep track of that and avoid
2218                          *  this rescan? --jhi)
2219                          */
2220                         int hicount = 0;
2221                         U8 *c;
2222                         for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2223                             if (!NATIVE_IS_INVARIANT(*c)) {
2224                                 hicount++;
2225                             }
2226                         }
2227                         if (hicount) {
2228                             const STRLEN offset = d - SvPVX_const(sv);
2229                             U8 *src, *dst;
2230                             d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2231                             src = (U8 *)d - 1;
2232                             dst = src+hicount;
2233                             d  += hicount;
2234                             while (src >= (const U8 *)SvPVX_const(sv)) {
2235                                 if (!NATIVE_IS_INVARIANT(*src)) {
2236                                     const U8 ch = NATIVE_TO_ASCII(*src);
2237                                     *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2238                                     *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2239                                 }
2240                                 else {
2241                                     *dst-- = *src;
2242                                 }
2243                                 src--;
2244                             }
2245                         }
2246                     }
2247
2248                     if (has_utf8 || uv > 255) {
2249                         d = (char*)uvchr_to_utf8((U8*)d, uv);
2250                         has_utf8 = TRUE;
2251                         if (PL_lex_inwhat == OP_TRANS &&
2252                             PL_sublex_info.sub_op) {
2253                             PL_sublex_info.sub_op->op_private |=
2254                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2255                                              : OPpTRANS_TO_UTF);
2256                         }
2257 #ifdef EBCDIC
2258                         if (uv > 255 && !dorange)
2259                             native_range = FALSE;
2260 #endif
2261                     }
2262                     else {
2263                         *d++ = (char)uv;
2264                     }
2265                 }
2266                 else {
2267                     *d++ = (char) uv;
2268                 }
2269                 continue;
2270
2271             /* \N{LATIN SMALL LETTER A} is a named character */
2272             case 'N':
2273                 ++s;
2274                 if (*s == '{') {
2275                     char* e = strchr(s, '}');
2276                     SV *res;
2277                     STRLEN len;
2278                     const char *str;
2279
2280                     if (!e) {
2281                         yyerror("Missing right brace on \\N{}");
2282                         e = s - 1;
2283                         goto cont_scan;
2284                     }
2285                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2286                         /* \N{U+...} */
2287                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2288                           PERL_SCAN_DISALLOW_PREFIX;
2289                         s += 3;
2290                         len = e - s;
2291                         uv = grok_hex(s, &len, &flags, NULL);
2292                         if ( e > s && len != (STRLEN)(e - s) ) {
2293                             uv = 0xFFFD;
2294                         }
2295                         s = e + 1;
2296                         goto NUM_ESCAPE_INSERT;
2297                     }
2298                     res = newSVpvn(s + 1, e - s - 1);
2299                     res = new_constant( NULL, 0, "charnames",
2300                                         res, NULL, s - 2, e - s + 3 );
2301                     if (has_utf8)
2302                         sv_utf8_upgrade(res);
2303                     str = SvPV_const(res,len);
2304 #ifdef EBCDIC_NEVER_MIND
2305                     /* charnames uses pack U and that has been
2306                      * recently changed to do the below uni->native
2307                      * mapping, so this would be redundant (and wrong,
2308                      * the code point would be doubly converted).
2309                      * But leave this in just in case the pack U change
2310                      * gets revoked, but the semantics is still
2311                      * desireable for charnames. --jhi */
2312                     {
2313                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2314
2315                          if (uv < 0x100) {
2316                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2317
2318                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2319                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2320                               str = SvPV_const(res, len);
2321                          }
2322                     }
2323 #endif
2324                     if (!has_utf8 && SvUTF8(res)) {
2325                         const char * const ostart = SvPVX_const(sv);
2326                         SvCUR_set(sv, d - ostart);
2327                         SvPOK_on(sv);
2328                         *d = '\0';
2329                         sv_utf8_upgrade(sv);
2330                         /* this just broke our allocation above... */
2331                         SvGROW(sv, (STRLEN)(send - start));
2332                         d = SvPVX(sv) + SvCUR(sv);
2333                         has_utf8 = TRUE;
2334                     }
2335                     if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2336                         const char * const odest = SvPVX_const(sv);
2337
2338                         SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2339                         d = SvPVX(sv) + (d - odest);
2340                     }
2341 #ifdef EBCDIC
2342                     if (!dorange)
2343                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2344 #endif
2345                     Copy(str, d, len, char);
2346                     d += len;
2347                     SvREFCNT_dec(res);
2348                   cont_scan:
2349                     s = e + 1;
2350                 }
2351                 else
2352                     yyerror("Missing braces on \\N{}");
2353                 continue;
2354
2355             /* \c is a control character */
2356             case 'c':
2357                 s++;
2358                 if (s < send) {
2359                     U8 c = *s++;
2360 #ifdef EBCDIC
2361                     if (isLOWER(c))
2362                         c = toUPPER(c);
2363 #endif
2364                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2365                 }
2366                 else {
2367                     yyerror("Missing control char name in \\c");
2368                 }
2369                 continue;
2370
2371             /* printf-style backslashes, formfeeds, newlines, etc */
2372             case 'b':
2373                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2374                 break;
2375             case 'n':
2376                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2377                 break;
2378             case 'r':
2379                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2380                 break;
2381             case 'f':
2382                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2383                 break;
2384             case 't':
2385                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2386                 break;
2387             case 'e':
2388                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2389                 break;
2390             case 'a':
2391                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2392                 break;
2393             } /* end switch */
2394
2395             s++;
2396             continue;
2397         } /* end if (backslash) */
2398 #ifdef EBCDIC
2399         else
2400             literal_endpoint++;
2401 #endif
2402
2403     default_action:
2404         /* If we started with encoded form, or already know we want it
2405            and then encode the next character */
2406         if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2407             STRLEN len  = 1;
2408             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2409             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2410             s += len;
2411             if (need > len) {
2412                 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2413                 const STRLEN off = d - SvPVX_const(sv);
2414                 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2415             }
2416             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2417             has_utf8 = TRUE;
2418 #ifdef EBCDIC
2419             if (uv > 255 && !dorange)
2420                 native_range = FALSE;
2421 #endif
2422         }
2423         else {
2424             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2425         }
2426     } /* while loop to process each character */
2427
2428     /* terminate the string and set up the sv */
2429     *d = '\0';
2430     SvCUR_set(sv, d - SvPVX_const(sv));
2431     if (SvCUR(sv) >= SvLEN(sv))
2432         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2433
2434     SvPOK_on(sv);
2435     if (PL_encoding && !has_utf8) {
2436         sv_recode_to_utf8(sv, PL_encoding);
2437         if (SvUTF8(sv))
2438             has_utf8 = TRUE;
2439     }
2440     if (has_utf8) {
2441         SvUTF8_on(sv);
2442         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2443             PL_sublex_info.sub_op->op_private |=
2444                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2445         }
2446     }
2447
2448     /* shrink the sv if we allocated more than we used */
2449     if (SvCUR(sv) + 5 < SvLEN(sv)) {
2450         SvPV_shrink_to_cur(sv);
2451     }
2452
2453     /* return the substring (via yylval) only if we parsed anything */
2454     if (s > PL_bufptr) {
2455         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2456             const char *const key = PL_lex_inpat ? "qr" : "q";
2457             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2458             const char *type;
2459             STRLEN typelen;
2460
2461             if (PL_lex_inwhat == OP_TRANS) {
2462                 type = "tr";
2463                 typelen = 2;
2464             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2465                 type = "s";
2466                 typelen = 1;
2467             } else  {
2468                 type = "qq";
2469                 typelen = 2;
2470             }
2471
2472             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2473                                 type, typelen);
2474         }
2475         yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2476     } else
2477         SvREFCNT_dec(sv);
2478     return s;
2479 }
2480
2481 /* S_intuit_more
2482  * Returns TRUE if there's more to the expression (e.g., a subscript),
2483  * FALSE otherwise.
2484  *
2485  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2486  *
2487  * ->[ and ->{ return TRUE
2488  * { and [ outside a pattern are always subscripts, so return TRUE
2489  * if we're outside a pattern and it's not { or [, then return FALSE
2490  * if we're in a pattern and the first char is a {
2491  *   {4,5} (any digits around the comma) returns FALSE
2492  * if we're in a pattern and the first char is a [
2493  *   [] returns FALSE
2494  *   [SOMETHING] has a funky algorithm to decide whether it's a
2495  *      character class or not.  It has to deal with things like
2496  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2497  * anything else returns TRUE
2498  */
2499
2500 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2501
2502 STATIC int
2503 S_intuit_more(pTHX_ register char *s)
2504 {
2505     dVAR;
2506     if (PL_lex_brackets)
2507         return TRUE;
2508     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2509         return TRUE;
2510     if (*s != '{' && *s != '[')
2511         return FALSE;
2512     if (!PL_lex_inpat)
2513         return TRUE;
2514
2515     /* In a pattern, so maybe we have {n,m}. */
2516     if (*s == '{') {
2517         s++;
2518         if (!isDIGIT(*s))
2519             return TRUE;
2520         while (isDIGIT(*s))
2521             s++;
2522         if (*s == ',')
2523             s++;
2524         while (isDIGIT(*s))
2525             s++;
2526         if (*s == '}')
2527             return FALSE;
2528         return TRUE;
2529
2530     }
2531
2532     /* On the other hand, maybe we have a character class */
2533
2534     s++;
2535     if (*s == ']' || *s == '^')
2536         return FALSE;
2537     else {
2538         /* this is terrifying, and it works */
2539         int weight = 2;         /* let's weigh the evidence */
2540         char seen[256];
2541         unsigned char un_char = 255, last_un_char;
2542         const char * const send = strchr(s,']');
2543         char tmpbuf[sizeof PL_tokenbuf * 4];
2544
2545         if (!send)              /* has to be an expression */
2546             return TRUE;
2547
2548         Zero(seen,256,char);
2549         if (*s == '$')
2550             weight -= 3;
2551         else if (isDIGIT(*s)) {
2552             if (s[1] != ']') {
2553                 if (isDIGIT(s[1]) && s[2] == ']')
2554                     weight -= 10;
2555             }
2556             else
2557                 weight -= 100;
2558         }
2559         for (; s < send; s++) {
2560             last_un_char = un_char;
2561             un_char = (unsigned char)*s;
2562             switch (*s) {
2563             case '@':
2564             case '&':
2565             case '$':
2566                 weight -= seen[un_char] * 10;
2567                 if (isALNUM_lazy_if(s+1,UTF)) {
2568                     int len;
2569                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2570                     len = (int)strlen(tmpbuf);
2571                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2572                         weight -= 100;
2573                     else
2574                         weight -= 10;
2575                 }
2576                 else if (*s == '$' && s[1] &&
2577                   strchr("[#!%*<>()-=",s[1])) {
2578                     if (/*{*/ strchr("])} =",s[2]))
2579                         weight -= 10;
2580                     else
2581                         weight -= 1;
2582                 }
2583                 break;
2584             case '\\':
2585                 un_char = 254;
2586                 if (s[1]) {
2587                     if (strchr("wds]",s[1]))
2588                         weight += 100;
2589                     else if (seen[(U8)'\''] || seen[(U8)'"'])
2590                         weight += 1;
2591                     else if (strchr("rnftbxcav",s[1]))
2592                         weight += 40;
2593                     else if (isDIGIT(s[1])) {
2594                         weight += 40;
2595                         while (s[1] && isDIGIT(s[1]))
2596                             s++;
2597                     }
2598                 }
2599                 else
2600                     weight += 100;
2601                 break;
2602             case '-':
2603                 if (s[1] == '\\')
2604                     weight += 50;
2605                 if (strchr("aA01! ",last_un_char))
2606                     weight += 30;
2607                 if (strchr("zZ79~",s[1]))
2608                     weight += 30;
2609                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2610                     weight -= 5;        /* cope with negative subscript */
2611                 break;
2612             default:
2613                 if (!isALNUM(last_un_char)
2614                     && !(last_un_char == '$' || last_un_char == '@'
2615                          || last_un_char == '&')
2616                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2617                     char *d = tmpbuf;
2618                     while (isALPHA(*s))
2619                         *d++ = *s++;
2620                     *d = '\0';
2621                     if (keyword(tmpbuf, d - tmpbuf, 0))
2622                         weight -= 150;
2623                 }
2624                 if (un_char == last_un_char + 1)
2625                     weight += 5;
2626                 weight -= seen[un_char];
2627                 break;
2628             }
2629             seen[un_char]++;
2630         }
2631         if (weight >= 0)        /* probably a character class */
2632             return FALSE;
2633     }
2634
2635     return TRUE;
2636 }
2637
2638 /*
2639  * S_intuit_method
2640  *
2641  * Does all the checking to disambiguate
2642  *   foo bar
2643  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
2644  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2645  *
2646  * First argument is the stuff after the first token, e.g. "bar".
2647  *
2648  * Not a method if bar is a filehandle.
2649  * Not a method if foo is a subroutine prototyped to take a filehandle.
2650  * Not a method if it's really "Foo $bar"
2651  * Method if it's "foo $bar"
2652  * Not a method if it's really "print foo $bar"
2653  * Method if it's really "foo package::" (interpreted as package->foo)
2654  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2655  * Not a method if bar is a filehandle or package, but is quoted with
2656  *   =>
2657  */
2658
2659 STATIC int
2660 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2661 {
2662     dVAR;
2663     char *s = start + (*start == '$');
2664     char tmpbuf[sizeof PL_tokenbuf];
2665     STRLEN len;
2666     GV* indirgv;
2667 #ifdef PERL_MAD
2668     int soff;
2669 #endif
2670
2671     if (gv) {
2672         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2673             return 0;
2674         if (cv) {
2675             if (SvPOK(cv)) {
2676                 const char *proto = SvPVX_const(cv);
2677                 if (proto) {
2678                     if (*proto == ';')
2679                         proto++;
2680                     if (*proto == '*')
2681                         return 0;
2682                 }
2683             }
2684         } else
2685             gv = NULL;
2686     }
2687     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2688     /* start is the beginning of the possible filehandle/object,
2689      * and s is the end of it
2690      * tmpbuf is a copy of it
2691      */
2692
2693     if (*start == '$') {
2694         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2695                 isUPPER(*PL_tokenbuf))
2696             return 0;
2697 #ifdef PERL_MAD
2698         len = start - SvPVX(PL_linestr);
2699 #endif
2700         s = PEEKSPACE(s);
2701 #ifdef PERL_MAD
2702         start = SvPVX(PL_linestr) + len;
2703 #endif
2704         PL_bufptr = start;
2705         PL_expect = XREF;
2706         return *s == '(' ? FUNCMETH : METHOD;
2707     }
2708     if (!keyword(tmpbuf, len, 0)) {
2709         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2710             len -= 2;
2711             tmpbuf[len] = '\0';
2712 #ifdef PERL_MAD
2713             soff = s - SvPVX(PL_linestr);
2714 #endif
2715             goto bare_package;
2716         }
2717         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2718         if (indirgv && GvCVu(indirgv))
2719             return 0;
2720         /* filehandle or package name makes it a method */
2721         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2722 #ifdef PERL_MAD
2723             soff = s - SvPVX(PL_linestr);
2724 #endif
2725             s = PEEKSPACE(s);
2726             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2727                 return 0;       /* no assumptions -- "=>" quotes bearword */
2728       bare_package:
2729             start_force(PL_curforce);
2730             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2731                                                    newSVpvn(tmpbuf,len));
2732             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2733             if (PL_madskills)
2734                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2735             PL_expect = XTERM;
2736             force_next(WORD);
2737             PL_bufptr = s;
2738 #ifdef PERL_MAD
2739             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2740 #endif
2741             return *s == '(' ? FUNCMETH : METHOD;
2742         }
2743     }
2744     return 0;
2745 }
2746
2747 /*
2748  * S_incl_perldb
2749  * Return a string of Perl code to load the debugger.  If PERL5DB
2750  * is set, it will return the contents of that, otherwise a
2751  * compile-time require of perl5db.pl.
2752  */
2753
2754 STATIC const char*
2755 S_incl_perldb(pTHX)
2756 {
2757     dVAR;
2758     if (PL_perldb) {
2759         const char * const pdb = PerlEnv_getenv("PERL5DB");
2760
2761         if (pdb)
2762             return pdb;
2763         SETERRNO(0,SS_NORMAL);
2764         return "BEGIN { require 'perl5db.pl' }";
2765     }
2766     return "";
2767 }
2768
2769
2770 /* Encoded script support. filter_add() effectively inserts a
2771  * 'pre-processing' function into the current source input stream.
2772  * Note that the filter function only applies to the current source file
2773  * (e.g., it will not affect files 'require'd or 'use'd by this one).
2774  *
2775  * The datasv parameter (which may be NULL) can be used to pass
2776  * private data to this instance of the filter. The filter function
2777  * can recover the SV using the FILTER_DATA macro and use it to
2778  * store private buffers and state information.
2779  *
2780  * The supplied datasv parameter is upgraded to a PVIO type
2781  * and the IoDIRP/IoANY field is used to store the function pointer,
2782  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2783  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2784  * private use must be set using malloc'd pointers.
2785  */
2786
2787 SV *
2788 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2789 {
2790     dVAR;
2791     if (!funcp)
2792         return NULL;
2793
2794     if (!PL_parser)
2795         return NULL;
2796
2797     if (!PL_rsfp_filters)
2798         PL_rsfp_filters = newAV();
2799     if (!datasv)
2800         datasv = newSV(0);
2801     SvUPGRADE(datasv, SVt_PVIO);
2802     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2803     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2804     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2805                           FPTR2DPTR(void *, IoANY(datasv)),
2806                           SvPV_nolen(datasv)));
2807     av_unshift(PL_rsfp_filters, 1);
2808     av_store(PL_rsfp_filters, 0, datasv) ;
2809     return(datasv);
2810 }
2811
2812
2813 /* Delete most recently added instance of this filter function. */
2814 void
2815 Perl_filter_del(pTHX_ filter_t funcp)
2816 {
2817     dVAR;
2818     SV *datasv;
2819
2820 #ifdef DEBUGGING
2821     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2822                           FPTR2DPTR(void*, funcp)));
2823 #endif
2824     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2825         return;
2826     /* if filter is on top of stack (usual case) just pop it off */
2827     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2828     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2829         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2830         IoANY(datasv) = (void *)NULL;
2831         sv_free(av_pop(PL_rsfp_filters));
2832
2833         return;
2834     }
2835     /* we need to search for the correct entry and clear it     */
2836     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2837 }
2838
2839
2840 /* Invoke the idxth filter function for the current rsfp.        */
2841 /* maxlen 0 = read one text line */
2842 I32
2843 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2844 {
2845     dVAR;
2846     filter_t funcp;
2847     SV *datasv = NULL;
2848     /* This API is bad. It should have been using unsigned int for maxlen.
2849        Not sure if we want to change the API, but if not we should sanity
2850        check the value here.  */
2851     const unsigned int correct_length
2852         = maxlen < 0 ?
2853 #ifdef PERL_MICRO
2854         0x7FFFFFFF
2855 #else
2856         INT_MAX
2857 #endif
2858         : maxlen;
2859
2860     if (!PL_parser || !PL_rsfp_filters)
2861         return -1;
2862     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
2863         /* Provide a default input filter to make life easy.    */
2864         /* Note that we append to the line. This is handy.      */
2865         DEBUG_P(PerlIO_printf(Perl_debug_log,
2866                               "filter_read %d: from rsfp\n", idx));
2867         if (correct_length) {
2868             /* Want a block */
2869             int len ;
2870             const int old_len = SvCUR(buf_sv);
2871
2872             /* ensure buf_sv is large enough */
2873             SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2874             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2875                                    correct_length)) <= 0) {
2876                 if (PerlIO_error(PL_rsfp))
2877                     return -1;          /* error */
2878                 else
2879                     return 0 ;          /* end of file */
2880             }
2881             SvCUR_set(buf_sv, old_len + len) ;
2882         } else {
2883             /* Want a line */
2884             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2885                 if (PerlIO_error(PL_rsfp))
2886                     return -1;          /* error */
2887                 else
2888                     return 0 ;          /* end of file */
2889             }
2890         }
2891         return SvCUR(buf_sv);
2892     }
2893     /* Skip this filter slot if filter has been deleted */
2894     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2895         DEBUG_P(PerlIO_printf(Perl_debug_log,
2896                               "filter_read %d: skipped (filter deleted)\n",
2897                               idx));
2898         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2899     }
2900     /* Get function pointer hidden within datasv        */
2901     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2902     DEBUG_P(PerlIO_printf(Perl_debug_log,
2903                           "filter_read %d: via function %p (%s)\n",
2904                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
2905     /* Call function. The function is expected to       */
2906     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
2907     /* Return: <0:error, =0:eof, >0:not eof             */
2908     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2909 }
2910
2911 STATIC char *
2912 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2913 {
2914     dVAR;
2915 #ifdef PERL_CR_FILTER
2916     if (!PL_rsfp_filters) {
2917         filter_add(S_cr_textfilter,NULL);
2918     }
2919 #endif
2920     if (PL_rsfp_filters) {
2921         if (!append)
2922             SvCUR_set(sv, 0);   /* start with empty line        */
2923         if (FILTER_READ(0, sv, 0) > 0)
2924             return ( SvPVX(sv) ) ;
2925         else
2926             return NULL ;
2927     }
2928     else
2929         return (sv_gets(sv, fp, append));
2930 }
2931
2932 STATIC HV *
2933 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2934 {
2935     dVAR;
2936     GV *gv;
2937
2938     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2939         return PL_curstash;
2940
2941     if (len > 2 &&
2942         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2943         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2944     {
2945         return GvHV(gv);                        /* Foo:: */
2946     }
2947
2948     /* use constant CLASS => 'MyClass' */
2949     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2950     if (gv && GvCV(gv)) {
2951         SV * const sv = cv_const_sv(GvCV(gv));
2952         if (sv)
2953             pkgname = SvPV_nolen_const(sv);
2954     }
2955
2956     return gv_stashpv(pkgname, 0);
2957 }
2958
2959 /*
2960  * S_readpipe_override
2961  * Check whether readpipe() is overriden, and generates the appropriate
2962  * optree, provided sublex_start() is called afterwards.
2963  */
2964 STATIC void
2965 S_readpipe_override(pTHX)
2966 {
2967     GV **gvp;
2968     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2969     yylval.ival = OP_BACKTICK;
2970     if ((gv_readpipe
2971                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2972             ||
2973             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2974              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2975              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2976     {
2977         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2978             append_elem(OP_LIST,
2979                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2980                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2981     }
2982 }
2983
2984 #ifdef PERL_MAD
2985  /*
2986  * Perl_madlex
2987  * The intent of this yylex wrapper is to minimize the changes to the
2988  * tokener when we aren't interested in collecting madprops.  It remains
2989  * to be seen how successful this strategy will be...
2990  */
2991
2992 int
2993 Perl_madlex(pTHX)
2994 {
2995     int optype;
2996     char *s = PL_bufptr;
2997
2998     /* make sure PL_thiswhite is initialized */
2999     PL_thiswhite = 0;
3000     PL_thismad = 0;
3001
3002     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3003     if (PL_pending_ident)
3004         return S_pending_ident(aTHX);
3005
3006     /* previous token ate up our whitespace? */
3007     if (!PL_lasttoke && PL_nextwhite) {
3008         PL_thiswhite = PL_nextwhite;
3009         PL_nextwhite = 0;
3010     }
3011
3012     /* isolate the token, and figure out where it is without whitespace */
3013     PL_realtokenstart = -1;
3014     PL_thistoken = 0;
3015     optype = yylex();
3016     s = PL_bufptr;
3017     assert(PL_curforce < 0);
3018
3019     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3020         if (!PL_thistoken) {
3021             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3022                 PL_thistoken = newSVpvs("");
3023             else {
3024                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3025                 PL_thistoken = newSVpvn(tstart, s - tstart);
3026             }
3027         }
3028         if (PL_thismad) /* install head */
3029             CURMAD('X', PL_thistoken);
3030     }
3031
3032     /* last whitespace of a sublex? */
3033     if (optype == ')' && PL_endwhite) {
3034         CURMAD('X', PL_endwhite);
3035     }
3036
3037     if (!PL_thismad) {
3038
3039         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3040         if (!PL_thiswhite && !PL_endwhite && !optype) {
3041             sv_free(PL_thistoken);
3042             PL_thistoken = 0;
3043             return 0;
3044         }
3045
3046         /* put off final whitespace till peg */
3047         if (optype == ';' && !PL_rsfp) {
3048             PL_nextwhite = PL_thiswhite;
3049             PL_thiswhite = 0;
3050         }
3051         else if (PL_thisopen) {
3052             CURMAD('q', PL_thisopen);
3053             if (PL_thistoken)
3054                 sv_free(PL_thistoken);
3055             PL_thistoken = 0;
3056         }
3057         else {
3058             /* Store actual token text as madprop X */
3059             CURMAD('X', PL_thistoken);
3060         }
3061
3062         if (PL_thiswhite) {
3063             /* add preceding whitespace as madprop _ */
3064             CURMAD('_', PL_thiswhite);
3065         }
3066
3067         if (PL_thisstuff) {
3068             /* add quoted material as madprop = */
3069             CURMAD('=', PL_thisstuff);
3070         }
3071
3072         if (PL_thisclose) {
3073             /* add terminating quote as madprop Q */
3074             CURMAD('Q', PL_thisclose);
3075         }
3076     }
3077
3078     /* special processing based on optype */
3079
3080     switch (optype) {
3081
3082     /* opval doesn't need a TOKEN since it can already store mp */
3083     case WORD:
3084     case METHOD:
3085     case FUNCMETH:
3086     case THING:
3087     case PMFUNC:
3088     case PRIVATEREF:
3089     case FUNC0SUB:
3090     case UNIOPSUB:
3091     case LSTOPSUB:
3092         if (yylval.opval)
3093             append_madprops(PL_thismad, yylval.opval, 0);
3094         PL_thismad = 0;
3095         return optype;
3096
3097     /* fake EOF */
3098     case 0:
3099         optype = PEG;
3100         if (PL_endwhite) {
3101             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3102             PL_endwhite = 0;
3103         }
3104         break;
3105
3106     case ']':
3107     case '}':
3108         if (PL_faketokens)
3109             break;
3110         /* remember any fake bracket that lexer is about to discard */
3111         if (PL_lex_brackets == 1 &&
3112             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3113         {
3114             s = PL_bufptr;
3115             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3116                 s++;
3117             if (*s == '}') {
3118                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3119                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3120                 PL_thiswhite = 0;
3121                 PL_bufptr = s - 1;
3122                 break;  /* don't bother looking for trailing comment */
3123             }
3124             else
3125                 s = PL_bufptr;
3126         }
3127         if (optype == ']')
3128             break;
3129         /* FALLTHROUGH */
3130
3131     /* attach a trailing comment to its statement instead of next token */
3132     case ';':
3133         if (PL_faketokens)
3134             break;
3135         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3136             s = PL_bufptr;
3137             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3138                 s++;
3139             if (*s == '\n' || *s == '#') {
3140                 while (s < PL_bufend && *s != '\n')
3141                     s++;
3142                 if (s < PL_bufend)
3143                     s++;
3144                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3145                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3146                 PL_thiswhite = 0;
3147                 PL_bufptr = s;
3148             }
3149         }
3150         break;
3151
3152     /* pval */
3153     case LABEL:
3154         break;
3155
3156     /* ival */
3157     default:
3158         break;
3159
3160     }
3161
3162     /* Create new token struct.  Note: opvals return early above. */
3163     yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3164     PL_thismad = 0;
3165     return optype;
3166 }
3167 #endif
3168
3169 STATIC char *
3170 S_tokenize_use(pTHX_ int is_use, char *s) {
3171     dVAR;
3172     if (PL_expect != XSTATE)
3173         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3174                     is_use ? "use" : "no"));
3175     s = SKIPSPACE1(s);
3176     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3177         s = force_version(s, TRUE);
3178         if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3179             start_force(PL_curforce);
3180             NEXTVAL_NEXTTOKE.opval = NULL;
3181             force_next(WORD);
3182         }
3183         else if (*s == 'v') {
3184             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3185             s = force_version(s, FALSE);
3186         }
3187     }
3188     else {
3189         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3190         s = force_version(s, FALSE);
3191     }
3192     yylval.ival = is_use;
3193     return s;
3194 }
3195 #ifdef DEBUGGING
3196     static const char* const exp_name[] =
3197         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3198           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3199         };
3200 #endif
3201
3202 /*
3203   yylex
3204
3205   Works out what to call the token just pulled out of the input
3206   stream.  The yacc parser takes care of taking the ops we return and
3207   stitching them into a tree.
3208
3209   Returns:
3210     PRIVATEREF
3211
3212   Structure:
3213       if read an identifier
3214           if we're in a my declaration
3215               croak if they tried to say my($foo::bar)
3216               build the ops for a my() declaration
3217           if it's an access to a my() variable
3218               are we in a sort block?
3219                   croak if my($a); $a <=> $b
3220               build ops for access to a my() variable
3221           if in a dq string, and they've said @foo and we can't find @foo
3222               croak
3223           build ops for a bareword
3224       if we already built the token before, use it.
3225 */
3226
3227
3228 #ifdef __SC__
3229 #pragma segment Perl_yylex
3230 #endif
3231 int
3232 Perl_yylex(pTHX)
3233 {
3234     dVAR;
3235     register char *s = PL_bufptr;
3236     register char *d;
3237     STRLEN len;
3238     bool bof = FALSE;
3239
3240     /* orig_keyword, gvp, and gv are initialized here because
3241      * jump to the label just_a_word_zero can bypass their
3242      * initialization later. */
3243     I32 orig_keyword = 0;
3244     GV *gv = NULL;
3245     GV **gvp = NULL;
3246
3247     DEBUG_T( {
3248         SV* tmp = newSVpvs("");
3249         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3250             (IV)CopLINE(PL_curcop),
3251             lex_state_names[PL_lex_state],
3252             exp_name[PL_expect],
3253             pv_display(tmp, s, strlen(s), 0, 60));
3254         SvREFCNT_dec(tmp);
3255     } );
3256     /* check if there's an identifier for us to look at */
3257     if (PL_pending_ident)
3258         return REPORT(S_pending_ident(aTHX));
3259
3260     /* no identifier pending identification */
3261
3262     switch (PL_lex_state) {
3263 #ifdef COMMENTARY
3264     case LEX_NORMAL:            /* Some compilers will produce faster */
3265     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3266         break;
3267 #endif
3268
3269     /* when we've already built the next token, just pull it out of the queue */
3270     case LEX_KNOWNEXT:
3271 #ifdef PERL_MAD
3272         PL_lasttoke--;
3273         yylval = PL_nexttoke[PL_lasttoke].next_val;
3274         if (PL_madskills) {
3275             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3276             PL_nexttoke[PL_lasttoke].next_mad = 0;
3277             if (PL_thismad && PL_thismad->mad_key == '_') {
3278                 PL_thiswhite = (SV*)PL_thismad->mad_val;
3279                 PL_thismad->mad_val = 0;
3280                 mad_free(PL_thismad);
3281                 PL_thismad = 0;
3282             }
3283         }
3284         if (!PL_lasttoke) {
3285             PL_lex_state = PL_lex_defer;
3286             PL_expect = PL_lex_expect;
3287             PL_lex_defer = LEX_NORMAL;
3288             if (!PL_nexttoke[PL_lasttoke].next_type)
3289                 return yylex();
3290         }
3291 #else
3292         PL_nexttoke--;
3293         yylval = PL_nextval[PL_nexttoke];
3294         if (!PL_nexttoke) {
3295             PL_lex_state = PL_lex_defer;
3296             PL_expect = PL_lex_expect;
3297             PL_lex_defer = LEX_NORMAL;
3298         }
3299 #endif
3300 #ifdef PERL_MAD
3301         /* FIXME - can these be merged?  */
3302         return(PL_nexttoke[PL_lasttoke].next_type);
3303 #else
3304         return REPORT(PL_nexttype[PL_nexttoke]);
3305 #endif
3306
3307     /* interpolated case modifiers like \L \U, including \Q and \E.
3308        when we get here, PL_bufptr is at the \
3309     */
3310     case LEX_INTERPCASEMOD:
3311 #ifdef DEBUGGING
3312         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3313             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3314 #endif
3315         /* handle \E or end of string */
3316         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3317             /* if at a \E */
3318             if (PL_lex_casemods) {
3319                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3320                 PL_lex_casestack[PL_lex_casemods] = '\0';
3321
3322                 if (PL_bufptr != PL_bufend
3323                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3324                     PL_bufptr += 2;
3325                     PL_lex_state = LEX_INTERPCONCAT;
3326 #ifdef PERL_MAD
3327                     if (PL_madskills)
3328                         PL_thistoken = newSVpvs("\\E");
3329 #endif
3330                 }
3331                 return REPORT(')');
3332             }
3333 #ifdef PERL_MAD
3334             while (PL_bufptr != PL_bufend &&
3335               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3336                 if (!PL_thiswhite)
3337                     PL_thiswhite = newSVpvs("");
3338                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3339                 PL_bufptr += 2;
3340             }
3341 #else
3342             if (PL_bufptr != PL_bufend)
3343                 PL_bufptr += 2;
3344 #endif
3345             PL_lex_state = LEX_INTERPCONCAT;
3346             return yylex();
3347         }
3348         else {
3349             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3350               "### Saw case modifier\n"); });
3351             s = PL_bufptr + 1;
3352             if (s[1] == '\\' && s[2] == 'E') {
3353 #ifdef PERL_MAD
3354                 if (!PL_thiswhite)
3355                     PL_thiswhite = newSVpvs("");
3356                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3357 #endif
3358                 PL_bufptr = s + 3;
3359                 PL_lex_state = LEX_INTERPCONCAT;
3360                 return yylex();
3361             }
3362             else {
3363                 I32 tmp;
3364                 if (!PL_madskills) /* when just compiling don't need correct */
3365                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3366                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
3367                 if ((*s == 'L' || *s == 'U') &&
3368                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3369                     PL_lex_casestack[--PL_lex_casemods] = '\0';
3370                     return REPORT(')');
3371                 }
3372                 if (PL_lex_casemods > 10)
3373                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3374                 PL_lex_casestack[PL_lex_casemods++] = *s;
3375                 PL_lex_casestack[PL_lex_casemods] = '\0';
3376                 PL_lex_state = LEX_INTERPCONCAT;
3377                 start_force(PL_curforce);
3378                 NEXTVAL_NEXTTOKE.ival = 0;
3379                 force_next('(');
3380                 start_force(PL_curforce);
3381                 if (*s == 'l')
3382                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3383                 else if (*s == 'u')
3384                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3385                 else if (*s == 'L')
3386                     NEXTVAL_NEXTTOKE.ival = OP_LC;
3387                 else if (*s == 'U')
3388                     NEXTVAL_NEXTTOKE.ival = OP_UC;
3389                 else if (*s == 'Q')
3390                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3391                 else
3392                     Perl_croak(aTHX_ "panic: yylex");
3393                 if (PL_madskills) {
3394                     SV* const tmpsv = newSVpvs("\\ ");
3395                     /* replace the space with the character we want to escape
3396                      */
3397                     SvPVX(tmpsv)[1] = *s;
3398                     curmad('_', tmpsv);
3399                 }
3400                 PL_bufptr = s + 1;
3401             }
3402             force_next(FUNC);
3403             if (PL_lex_starts) {
3404                 s = PL_bufptr;
3405                 PL_lex_starts = 0;
3406 #ifdef PERL_MAD
3407                 if (PL_madskills) {
3408                     if (PL_thistoken)
3409                         sv_free(PL_thistoken);
3410                     PL_thistoken = newSVpvs("");
3411                 }
3412 #endif
3413                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3414                 if (PL_lex_casemods == 1 && PL_lex_inpat)
3415                     OPERATOR(',');
3416                 else
3417                     Aop(OP_CONCAT);
3418             }
3419             else
3420                 return yylex();
3421         }
3422
3423     case LEX_INTERPPUSH:
3424         return REPORT(sublex_push());
3425
3426     case LEX_INTERPSTART:
3427         if (PL_bufptr == PL_bufend)
3428             return REPORT(sublex_done());
3429         DEBUG_T({ PerlIO_printf(Perl_debug_log,
3430               "### Interpolated variable\n"); });
3431         PL_expect = XTERM;
3432         PL_lex_dojoin = (*PL_bufptr == '@');
3433         PL_lex_state = LEX_INTERPNORMAL;
3434         if (PL_lex_dojoin) {
3435             start_force(PL_curforce);
3436             NEXTVAL_NEXTTOKE.ival = 0;
3437             force_next(',');
3438             start_force(PL_curforce);
3439             force_ident("\"", '$');
3440             start_force(PL_curforce);
3441             NEXTVAL_NEXTTOKE.ival = 0;
3442             force_next('$');
3443             start_force(PL_curforce);
3444             NEXTVAL_NEXTTOKE.ival = 0;
3445             force_next('(');
3446             start_force(PL_curforce);
3447             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
3448             force_next(FUNC);
3449         }
3450         if (PL_lex_starts++) {
3451             s = PL_bufptr;
3452 #ifdef PERL_MAD
3453             if (PL_madskills) {
3454                 if (PL_thistoken)
3455                     sv_free(PL_thistoken);
3456                 PL_thistoken = newSVpvs("");
3457             }
3458 #endif
3459             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3460             if (!PL_lex_casemods && PL_lex_inpat)
3461                 OPERATOR(',');
3462             else
3463                 Aop(OP_CONCAT);
3464         }
3465         return yylex();
3466
3467     case LEX_INTERPENDMAYBE:
3468         if (intuit_more(PL_bufptr)) {
3469             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
3470             break;
3471         }
3472         /* FALL THROUGH */
3473
3474     case LEX_INTERPEND:
3475         if (PL_lex_dojoin) {
3476             PL_lex_dojoin = FALSE;
3477             PL_lex_state = LEX_INTERPCONCAT;
3478 #ifdef PERL_MAD
3479             if (PL_madskills) {
3480                 if (PL_thistoken)
3481                     sv_free(PL_thistoken);
3482                 PL_thistoken = newSVpvs("");
3483             }
3484 #endif
3485             return REPORT(')');
3486         }
3487         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3488             && SvEVALED(PL_lex_repl))
3489         {
3490             if (PL_bufptr != PL_bufend)
3491                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3492             PL_lex_repl = NULL;
3493         }
3494         /* FALLTHROUGH */
3495     case LEX_INTERPCONCAT:
3496 #ifdef DEBUGGING
3497         if (PL_lex_brackets)
3498             Perl_croak(aTHX_ "panic: INTERPCONCAT");
3499 #endif
3500         if (PL_bufptr == PL_bufend)
3501             return REPORT(sublex_done());
3502
3503         if (SvIVX(PL_linestr) == '\'') {
3504             SV *sv = newSVsv(PL_linestr);
3505             if (!PL_lex_inpat)
3506                 sv = tokeq(sv);
3507             else if ( PL_hints & HINT_NEW_RE )
3508                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3509             yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3510             s = PL_bufend;
3511         }
3512         else {
3513             s = scan_const(PL_bufptr);
3514             if (*s == '\\')
3515                 PL_lex_state = LEX_INTERPCASEMOD;
3516             else
3517                 PL_lex_state = LEX_INTERPSTART;
3518         }
3519
3520         if (s != PL_bufptr) {
3521             start_force(PL_curforce);
3522             if (PL_madskills) {
3523                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3524             }
3525             NEXTVAL_NEXTTOKE = yylval;
3526             PL_expect = XTERM;
3527             force_next(THING);
3528             if (PL_lex_starts++) {
3529 #ifdef PERL_MAD
3530                 if (PL_madskills) {
3531                     if (PL_thistoken)
3532                         sv_free(PL_thistoken);
3533                     PL_thistoken = newSVpvs("");
3534                 }
3535 #endif
3536                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3537                 if (!PL_lex_casemods && PL_lex_inpat)
3538                     OPERATOR(',');
3539                 else
3540                     Aop(OP_CONCAT);
3541             }
3542             else {
3543                 PL_bufptr = s;
3544                 return yylex();
3545             }
3546         }
3547
3548         return yylex();
3549     case LEX_FORMLINE:
3550         PL_lex_state = LEX_NORMAL;
3551         s = scan_formline(PL_bufptr);
3552         if (!PL_lex_formbrack)
3553             goto rightbracket;
3554         OPERATOR(';');
3555     }
3556
3557     s = PL_bufptr;
3558     PL_oldoldbufptr = PL_oldbufptr;
3559     PL_oldbufptr = s;
3560
3561   retry:
3562 #ifdef PERL_MAD
3563     if (PL_thistoken) {
3564         sv_free(PL_thistoken);
3565         PL_thistoken = 0;
3566     }
3567     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
3568 #endif
3569     switch (*s) {
3570     default:
3571         if (isIDFIRST_lazy_if(s,UTF))
3572             goto keylookup;
3573         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3574         Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1);
3575     case 4:
3576     case 26:
3577         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
3578     case 0:
3579 #ifdef PERL_MAD
3580         if (PL_madskills)
3581             PL_faketokens = 0;
3582 #endif
3583         if (!PL_rsfp) {
3584             PL_last_uni = 0;
3585             PL_last_lop = 0;
3586             if (PL_lex_brackets) {
3587                 yyerror((const char *)
3588                         (PL_lex_formbrack
3589                          ? "Format not terminated"
3590                          : "Missing right curly or square bracket"));
3591             }
3592             DEBUG_T( { PerlIO_printf(Perl_debug_log,
3593                         "### Tokener got EOF\n");
3594             } );
3595             TOKEN(0);
3596         }
3597         if (s++ < PL_bufend)
3598             goto retry;                 /* ignore stray nulls */
3599         PL_last_uni = 0;
3600         PL_last_lop = 0;
3601         if (!PL_in_eval && !PL_preambled) {
3602             PL_preambled = TRUE;
3603 #ifdef PERL_MAD
3604             if (PL_madskills)
3605                 PL_faketokens = 1;
3606 #endif
3607             sv_setpv(PL_linestr,incl_perldb());
3608             if (SvCUR(PL_linestr))
3609                 sv_catpvs(PL_linestr,";");
3610             if (PL_preambleav){
3611                 while(AvFILLp(PL_preambleav) >= 0) {
3612                     SV *tmpsv = av_shift(PL_preambleav);
3613                     sv_catsv(PL_linestr, tmpsv);
3614                     sv_catpvs(PL_linestr, ";");
3615                     sv_free(tmpsv);
3616                 }
3617                 sv_free((SV*)PL_preambleav);
3618                 PL_preambleav = NULL;
3619             }
3620             if (PL_minus_n || PL_minus_p) {
3621                 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3622                 if (PL_minus_l)
3623                     sv_catpvs(PL_linestr,"chomp;");
3624                 if (PL_minus_a) {
3625                     if (PL_minus_F) {
3626                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3627                              || *PL_splitstr == '"')
3628                               && strchr(PL_splitstr + 1, *PL_splitstr))
3629                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3630                         else {
3631                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3632                                bytes can be used as quoting characters.  :-) */
3633                             const char *splits = PL_splitstr;
3634                             sv_catpvs(PL_linestr, "our @F=split(q\0");
3635                             do {
3636                                 /* Need to \ \s  */
3637                                 if (*splits == '\\')
3638                                     sv_catpvn(PL_linestr, splits, 1);
3639                                 sv_catpvn(PL_linestr, splits, 1);
3640                             } while (*splits++);
3641                             /* This loop will embed the trailing NUL of
3642                                PL_linestr as the last thing it does before
3643                                terminating.  */
3644                             sv_catpvs(PL_linestr, ");");
3645                         }
3646                     }
3647                     else
3648                         sv_catpvs(PL_linestr,"our @F=split(' ');");
3649                 }
3650             }
3651             if (PL_minus_E)
3652                 sv_catpvs(PL_linestr,"use feature ':5.10';");
3653             sv_catpvs(PL_linestr, "\n");
3654             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3655             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3656             PL_last_lop = PL_last_uni = NULL;
3657             if (PERLDB_LINE && PL_curstash != PL_debstash)
3658                 update_debugger_info(PL_linestr, NULL, 0);
3659             goto retry;
3660         }
3661         do {
3662             bof = PL_rsfp ? TRUE : FALSE;
3663             if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3664               fake_eof:
3665 #ifdef PERL_MAD
3666                 PL_realtokenstart = -1;
3667 #endif
3668                 if (PL_rsfp) {
3669                     if (PL_preprocess && !PL_in_eval)
3670                         (void)PerlProc_pclose(PL_rsfp);
3671                     else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3672                         PerlIO_clearerr(PL_rsfp);
3673                     else
3674                         (void)PerlIO_close(PL_rsfp);
3675                     PL_rsfp = NULL;
3676                     PL_doextract = FALSE;
3677                 }
3678                 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3679 #ifdef PERL_MAD
3680                     if (PL_madskills)
3681                         PL_faketokens = 1;
3682 #endif
3683                     sv_setpv(PL_linestr,
3684                              (const char *)
3685                              (PL_minus_p
3686                               ? ";}continue{print;}" : ";}"));
3687                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3688                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3689                     PL_last_lop = PL_last_uni = NULL;
3690                     PL_minus_n = PL_minus_p = 0;
3691                     goto retry;
3692                 }
3693                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3694                 PL_last_lop = PL_last_uni = NULL;
3695                 sv_setpvn(PL_linestr,"",0);
3696                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
3697             }
3698             /* If it looks like the start of a BOM or raw UTF-16,
3699              * check if it in fact is. */
3700             else if (bof &&
3701                      (*s == 0 ||
3702                       *(U8*)s == 0xEF ||
3703                       *(U8*)s >= 0xFE ||
3704                       s[1] == 0)) {
3705 #ifdef PERLIO_IS_STDIO
3706 #  ifdef __GNU_LIBRARY__
3707 #    if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3708 #      define FTELL_FOR_PIPE_IS_BROKEN
3709 #    endif
3710 #  else
3711 #    ifdef __GLIBC__
3712 #      if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3713 #        define FTELL_FOR_PIPE_IS_BROKEN
3714 #      endif
3715 #    endif
3716 #  endif
3717 #endif
3718 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3719                 /* This loses the possibility to detect the bof
3720                  * situation on perl -P when the libc5 is being used.
3721                  * Workaround?  Maybe attach some extra state to PL_rsfp?
3722                  */
3723                 if (!PL_preprocess)
3724                     bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3725 #else
3726                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3727 #endif
3728                 if (bof) {
3729                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3730                     s = swallow_bom((U8*)s);
3731                 }
3732             }
3733             if (PL_doextract) {
3734                 /* Incest with pod. */
3735 #ifdef PERL_MAD
3736                 if (PL_madskills)
3737                     sv_catsv(PL_thiswhite, PL_linestr);
3738 #endif
3739                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3740                     sv_setpvn(PL_linestr, "", 0);
3741                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3742                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3743                     PL_last_lop = PL_last_uni = NULL;
3744                     PL_doextract = FALSE;
3745                 }
3746             }
3747             incline(s);
3748         } while (PL_doextract);
3749         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3750         if (PERLDB_LINE && PL_curstash != PL_debstash)
3751             update_debugger_info(PL_linestr, NULL, 0);
3752         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3753         PL_last_lop = PL_last_uni = NULL;
3754         if (CopLINE(PL_curcop) == 1) {
3755             while (s < PL_bufend && isSPACE(*s))
3756                 s++;
3757             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3758                 s++;
3759 #ifdef PERL_MAD
3760             if (PL_madskills)
3761                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3762 #endif
3763             d = NULL;
3764             if (!PL_in_eval) {
3765                 if (*s == '#' && *(s+1) == '!')
3766                     d = s + 2;
3767 #ifdef ALTERNATE_SHEBANG
3768                 else {
3769                     static char const as[] = ALTERNATE_SHEBANG;
3770                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3771                         d = s + (sizeof(as) - 1);
3772                 }
3773 #endif /* ALTERNATE_SHEBANG */
3774             }
3775             if (d) {
3776                 char *ipath;
3777                 char *ipathend;
3778
3779                 while (isSPACE(*d))
3780                     d++;
3781                 ipath = d;
3782                 while (*d && !isSPACE(*d))
3783                     d++;
3784                 ipathend = d;
3785
3786 #ifdef ARG_ZERO_IS_SCRIPT
3787                 if (ipathend > ipath) {
3788                     /*
3789                      * HP-UX (at least) sets argv[0] to the script name,
3790                      * which makes $^X incorrect.  And Digital UNIX and Linux,
3791                      * at least, set argv[0] to the basename of the Perl
3792                      * interpreter. So, having found "#!", we'll set it right.
3793                      */
3794                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3795                                                     SVt_PV)); /* $^X */
3796                     assert(SvPOK(x) || SvGMAGICAL(x));
3797                     if (sv_eq(x, CopFILESV(PL_curcop))) {
3798                         sv_setpvn(x, ipath, ipathend - ipath);
3799                         SvSETMAGIC(x);
3800                     }
3801                     else {
3802                         STRLEN blen;
3803                         STRLEN llen;
3804                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3805                         const char * const lstart = SvPV_const(x,llen);
3806                         if (llen < blen) {
3807                             bstart += blen - llen;
3808                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3809                                 sv_setpvn(x, ipath, ipathend - ipath);
3810                                 SvSETMAGIC(x);
3811                             }
3812                         }
3813                     }
3814                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
3815                 }
3816 #endif /* ARG_ZERO_IS_SCRIPT */
3817
3818                 /*
3819                  * Look for options.
3820                  */
3821                 d = instr(s,"perl -");
3822                 if (!d) {
3823                     d = instr(s,"perl");
3824 #if defined(DOSISH)
3825                     /* avoid getting into infinite loops when shebang
3826                      * line contains "Perl" rather than "perl" */
3827                     if (!d) {
3828                         for (d = ipathend-4; d >= ipath; --d) {
3829                             if ((*d == 'p' || *d == 'P')
3830                                 && !ibcmp(d, "perl", 4))
3831                             {
3832                                 break;
3833                             }
3834                         }
3835                         if (d < ipath)
3836                             d = NULL;
3837                     }
3838 #endif
3839                 }
3840 #ifdef ALTERNATE_SHEBANG
3841                 /*
3842                  * If the ALTERNATE_SHEBANG on this system starts with a
3843                  * character that can be part of a Perl expression, then if
3844                  * we see it but not "perl", we're probably looking at the
3845                  * start of Perl code, not a request to hand off to some
3846                  * other interpreter.  Similarly, if "perl" is there, but
3847                  * not in the first 'word' of the line, we assume the line
3848                  * contains the start of the Perl program.
3849                  */
3850                 if (d && *s != '#') {
3851                     const char *c = ipath;
3852                     while (*c && !strchr("; \t\r\n\f\v#", *c))
3853                         c++;
3854                     if (c < d)
3855                         d = NULL;       /* "perl" not in first word; ignore */
3856                     else
3857                         *s = '#';       /* Don't try to parse shebang line */
3858                 }
3859 #endif /* ALTERNATE_SHEBANG */
3860 #ifndef MACOS_TRADITIONAL
3861                 if (!d &&
3862                     *s == '#' &&
3863                     ipathend > ipath &&
3864                     !PL_minus_c &&
3865                     !instr(s,"indir") &&
3866                     instr(PL_origargv[0],"perl"))
3867                 {
3868                     dVAR;
3869                     char **newargv;
3870
3871                     *ipathend = '\0';
3872                     s = ipathend + 1;
3873                     while (s < PL_bufend && isSPACE(*s))
3874                         s++;
3875                     if (s < PL_bufend) {
3876                         Newxz(newargv,PL_origargc+3,char*);
3877                         newargv[1] = s;
3878                         while (s < PL_bufend && !isSPACE(*s))
3879                             s++;
3880                         *s = '\0';
3881                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3882                     }
3883                     else
3884                         newargv = PL_origargv;
3885                     newargv[0] = ipath;
3886                     PERL_FPU_PRE_EXEC
3887                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3888                     PERL_FPU_POST_EXEC
3889                     Perl_croak(aTHX_ "Can't exec %s", ipath);
3890                 }
3891 #endif
3892                 if (d) {
3893                     while (*d && !isSPACE(*d))
3894                         d++;
3895                     while (SPACE_OR_TAB(*d))
3896                         d++;
3897
3898                     if (*d++ == '-') {
3899                         const bool switches_done = PL_doswitches;
3900                         const U32 oldpdb = PL_perldb;
3901                         const bool oldn = PL_minus_n;
3902                         const bool oldp = PL_minus_p;
3903                         const char *d1 = d;
3904
3905                         do {
3906                             if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
3907                                 const char * const m = d1;
3908                                 while (*d1 && !isSPACE(*d1))
3909                                     d1++;
3910                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3911                                       (int)(d1 - m), m);
3912                             }
3913                             d1 = moreswitches(d1);
3914                         } while (d1);
3915                         if (PL_doswitches && !switches_done) {
3916                             int argc = PL_origargc;
3917                             char **argv = PL_origargv;
3918                             do {
3919                                 argc--,argv++;
3920                             } while (argc && argv[0][0] == '-' && argv[0][1]);
3921                             init_argv_symbols(argc,argv);
3922                         }
3923                         if ((PERLDB_LINE && !oldpdb) ||
3924                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3925                               /* if we have already added "LINE: while (<>) {",
3926                                  we must not do it again */
3927                         {
3928                             sv_setpvn(PL_linestr, "", 0);
3929                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3930                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3931                             PL_last_lop = PL_last_uni = NULL;
3932                             PL_preambled = FALSE;
3933                             if (PERLDB_LINE)
3934                                 (void)gv_fetchfile(PL_origfilename);
3935                             goto retry;
3936                         }
3937                     }
3938                 }
3939             }
3940         }
3941         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3942             PL_bufptr = s;
3943             PL_lex_state = LEX_FORMLINE;
3944             return yylex();
3945         }
3946         goto retry;
3947     case '\r':
3948 #ifdef PERL_STRICT_CR
3949         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3950         Perl_croak(aTHX_
3951       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3952 #endif
3953     case ' ': case '\t': case '\f': case 013:
3954 #ifdef MACOS_TRADITIONAL
3955     case '\312':
3956 #endif
3957 #ifdef PERL_MAD
3958         PL_realtokenstart = -1;
3959         if (!PL_thiswhite)
3960             PL_thiswhite = newSVpvs("");
3961         sv_catpvn(PL_thiswhite, s, 1);
3962 #endif
3963         s++;
3964         goto retry;
3965     case '#':
3966     case '\n':
3967 #ifdef PERL_MAD
3968         PL_realtokenstart = -1;
3969         if (PL_madskills)
3970             PL_faketokens = 0;
3971 #endif
3972         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3973             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3974                 /* handle eval qq[#line 1 "foo"\n ...] */
3975                 CopLINE_dec(PL_curcop);
3976                 incline(s);
3977             }
3978             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3979                 s = SKIPSPACE0(s);
3980                 if (!PL_in_eval || PL_rsfp)
3981                     incline(s);
3982             }
3983             else {
3984                 d = s;
3985                 while (d < PL_bufend && *d != '\n')
3986                     d++;
3987                 if (d < PL_bufend)
3988                     d++;
3989                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3990                   Perl_croak(aTHX_ "panic: input overflow");
3991 #ifdef PERL_MAD
3992                 if (PL_madskills)
3993                     PL_thiswhite = newSVpvn(s, d - s);
3994 #endif
3995                 s = d;
3996                 incline(s);
3997             }
3998             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3999                 PL_bufptr = s;
4000                 PL_lex_state = LEX_FORMLINE;
4001                 return yylex();
4002             }
4003         }
4004         else {
4005 #ifdef PERL_MAD
4006             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4007                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4008                     PL_faketokens = 0;
4009                     s = SKIPSPACE0(s);
4010                     TOKEN(PEG); /* make sure any #! line is accessible */
4011                 }
4012                 s = SKIPSPACE0(s);
4013             }
4014             else {
4015 /*              if (PL_madskills && PL_lex_formbrack) { */
4016                     d = s;
4017                     while (d < PL_bufend && *d != '\n')
4018                         d++;
4019                     if (d < PL_bufend)
4020                         d++;
4021                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4022                       Perl_croak(aTHX_ "panic: input overflow");
4023                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4024                         if (!PL_thiswhite)
4025                             PL_thiswhite = newSVpvs("");
4026                         if (CopLINE(PL_curcop) == 1) {
4027                             sv_setpvn(PL_thiswhite, "", 0);
4028                             PL_faketokens = 0;
4029                         }
4030                         sv_catpvn(PL_thiswhite, s, d - s);
4031                     }
4032                     s = d;
4033 /*              }
4034                 *s = '\0';
4035                 PL_bufend = s; */
4036             }
4037 #else
4038             *s = '\0';
4039             PL_bufend = s;
4040 #endif
4041         }
4042         goto retry;
4043     case '-':
4044         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4045             I32 ftst = 0;
4046             char tmp;
4047
4048             s++;
4049             PL_bufptr = s;
4050             tmp = *s++;
4051
4052             while (s < PL_bufend && SPACE_OR_TAB(*s))
4053                 s++;
4054
4055             if (strnEQ(s,"=>",2)) {
4056                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4057                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4058                 OPERATOR('-');          /* unary minus */
4059             }
4060             PL_last_uni = PL_oldbufptr;
4061             switch (tmp) {
4062             case 'r': ftst = OP_FTEREAD;        break;
4063             case 'w': ftst = OP_FTEWRITE;       break;
4064             case 'x': ftst = OP_FTEEXEC;        break;
4065             case 'o': ftst = OP_FTEOWNED;       break;
4066             case 'R': ftst = OP_FTRREAD;        break;
4067             case 'W': ftst = OP_FTRWRITE;       break;
4068             case 'X': ftst = OP_FTREXEC;        break;
4069             case 'O': ftst = OP_FTROWNED;       break;
4070             case 'e': ftst = OP_FTIS;           break;
4071             case 'z': ftst = OP_FTZERO;         break;
4072             case 's': ftst = OP_FTSIZE;         break;
4073             case 'f': ftst = OP_FTFILE;         break;
4074             case 'd': ftst = OP_FTDIR;          break;
4075             case 'l': ftst = OP_FTLINK;         break;
4076             case 'p': ftst = OP_FTPIPE;         break;
4077             case 'S': ftst = OP_FTSOCK;         break;
4078             case 'u': ftst = OP_FTSUID;         break;
4079             case 'g': ftst = OP_FTSGID;         break;
4080             case 'k': ftst = OP_FTSVTX;         break;
4081             case 'b': ftst = OP_FTBLK;          break;
4082             case 'c': ftst = OP_FTCHR;          break;
4083             case 't': ftst = OP_FTTTY;          break;
4084             case 'T': ftst = OP_FTTEXT;         break;
4085             case 'B': ftst = OP_FTBINARY;       break;
4086             case 'M': case 'A': case 'C':
4087                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4088                 switch (tmp) {
4089                 case 'M': ftst = OP_FTMTIME;    break;
4090                 case 'A': ftst = OP_FTATIME;    break;
4091                 case 'C': ftst = OP_FTCTIME;    break;
4092                 default:                        break;
4093                 }
4094                 break;
4095             default:
4096                 break;
4097             }
4098             if (ftst) {
4099                 PL_last_lop_op = (OPCODE)ftst;
4100                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4101                         "### Saw file test %c\n", (int)tmp);
4102                 } );
4103                 FTST(ftst);
4104             }
4105             else {
4106                 /* Assume it was a minus followed by a one-letter named
4107                  * subroutine call (or a -bareword), then. */
4108                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4109                         "### '-%c' looked like a file test but was not\n",
4110                         (int) tmp);
4111                 } );
4112                 s = --PL_bufptr;
4113             }
4114         }
4115         {
4116             const char tmp = *s++;
4117             if (*s == tmp) {
4118                 s++;
4119                 if (PL_expect == XOPERATOR)
4120                     TERM(POSTDEC);
4121                 else
4122                     OPERATOR(PREDEC);
4123             }
4124             else if (*s == '>') {
4125                 s++;
4126                 s = SKIPSPACE1(s);
4127                 if (isIDFIRST_lazy_if(s,UTF)) {
4128                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4129                     TOKEN(ARROW);
4130                 }
4131                 else if (*s == '$')
4132                     OPERATOR(ARROW);
4133                 else
4134                     TERM(ARROW);
4135             }
4136             if (PL_expect == XOPERATOR)
4137                 Aop(OP_SUBTRACT);
4138             else {
4139                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4140                     check_uni();
4141                 OPERATOR('-');          /* unary minus */
4142             }
4143         }
4144
4145     case '+':
4146         {
4147             const char tmp = *s++;
4148             if (*s == tmp) {
4149                 s++;
4150                 if (PL_expect == XOPERATOR)
4151                     TERM(POSTINC);
4152                 else
4153                     OPERATOR(PREINC);
4154             }
4155             if (PL_expect == XOPERATOR)
4156                 Aop(OP_ADD);
4157             else {
4158                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4159                     check_uni();
4160                 OPERATOR('+');
4161             }
4162         }
4163
4164     case '*':
4165         if (PL_expect != XOPERATOR) {
4166             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4167             PL_expect = XOPERATOR;
4168             force_ident(PL_tokenbuf, '*');
4169             if (!*PL_tokenbuf)
4170                 PREREF('*');
4171             TERM('*');
4172         }
4173         s++;
4174         if (*s == '*') {
4175             s++;
4176             PWop(OP_POW);
4177         }
4178         Mop(OP_MULTIPLY);
4179
4180     case '%':
4181         if (PL_expect == XOPERATOR) {
4182             ++s;
4183             Mop(OP_MODULO);
4184         }
4185         PL_tokenbuf[0] = '%';
4186         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4187                 sizeof PL_tokenbuf - 1, FALSE);
4188         if (!PL_tokenbuf[1]) {
4189             PREREF('%');
4190         }
4191         PL_pending_ident = '%';
4192         TERM('%');
4193
4194     case '^':
4195         s++;
4196         BOop(OP_BIT_XOR);
4197     case '[':
4198         PL_lex_brackets++;
4199         /* FALL THROUGH */
4200     case '~':
4201         if (s[1] == '~'
4202             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4203         {
4204             s += 2;
4205             Eop(OP_SMARTMATCH);
4206         }
4207     case ',':
4208         {
4209             const char tmp = *s++;
4210             OPERATOR(tmp);
4211         }
4212     case ':':
4213         if (s[1] == ':') {
4214             len = 0;
4215             goto just_a_word_zero_gv;
4216         }
4217         s++;
4218         switch (PL_expect) {
4219             OP *attrs;
4220 #ifdef PERL_MAD
4221             I32 stuffstart;
4222 #endif
4223         case XOPERATOR:
4224             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4225                 break;
4226             PL_bufptr = s;      /* update in case we back off */
4227             goto grabattrs;
4228         case XATTRBLOCK:
4229             PL_expect = XBLOCK;
4230             goto grabattrs;
4231         case XATTRTERM:
4232             PL_expect = XTERMBLOCK;
4233          grabattrs:
4234 #ifdef PERL_MAD
4235             stuffstart = s - SvPVX(PL_linestr) - 1;
4236 #endif
4237             s = PEEKSPACE(s);
4238             attrs = NULL;
4239             while (isIDFIRST_lazy_if(s,UTF)) {
4240                 I32 tmp;
4241                 SV *sv;
4242                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4243                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4244                     if (tmp < 0) tmp = -tmp;
4245                     switch (tmp) {
4246                     case KEY_or:
4247                     case KEY_and:
4248                     case KEY_for:
4249                     case KEY_unless:
4250                     case KEY_if:
4251                     case KEY_while:
4252                     case KEY_until:
4253                         goto got_attrs;
4254                     default:
4255                         break;
4256                     }
4257                 }
4258                 sv = newSVpvn(s, len);
4259                 if (*d == '(') {
4260                     d = scan_str(d,TRUE,TRUE);
4261                     if (!d) {
4262                         /* MUST advance bufptr here to avoid bogus
4263                            "at end of line" context messages from yyerror().
4264                          */
4265                         PL_bufptr = s + len;
4266                         yyerror("Unterminated attribute parameter in attribute list");
4267                         if (attrs)
4268                             op_free(attrs);
4269                         sv_free(sv);
4270                         return REPORT(0);       /* EOF indicator */
4271                     }
4272                 }
4273                 if (PL_lex_stuff) {
4274                     sv_catsv(sv, PL_lex_stuff);
4275                     attrs = append_elem(OP_LIST, attrs,
4276                                         newSVOP(OP_CONST, 0, sv));
4277                     SvREFCNT_dec(PL_lex_stuff);
4278                     PL_lex_stuff = NULL;
4279                 }
4280                 else {
4281                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4282                         sv_free(sv);
4283                         if (PL_in_my == KEY_our) {
4284 #ifdef USE_ITHREADS
4285                             GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4286 #else
4287                             /* skip to avoid loading attributes.pm */
4288 #endif
4289                             deprecate(":unique");
4290                         }
4291                         else
4292                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4293                     }
4294
4295                     /* NOTE: any CV attrs applied here need to be part of
4296                        the CVf_BUILTIN_ATTRS define in cv.h! */
4297                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4298                         sv_free(sv);
4299                         CvLVALUE_on(PL_compcv);
4300                     }
4301                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4302                         sv_free(sv);
4303                         CvLOCKED_on(PL_compcv);
4304                     }
4305                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4306                         sv_free(sv);
4307                         CvMETHOD_on(PL_compcv);
4308                     }
4309                     /* After we've set the flags, it could be argued that
4310                        we don't need to do the attributes.pm-based setting
4311                        process, and shouldn't bother appending recognized
4312                        flags.  To experiment with that, uncomment the
4313                        following "else".  (Note that's already been
4314                        uncommented.  That keeps the above-applied built-in
4315                        attributes from being intercepted (and possibly
4316                        rejected) by a package's attribute routines, but is
4317                        justified by the performance win for the common case
4318                        of applying only built-in attributes.) */
4319                     else
4320                         attrs = append_elem(OP_LIST, attrs,
4321                                             newSVOP(OP_CONST, 0,
4322                                                     sv));
4323                 }
4324                 s = PEEKSPACE(d);
4325                 if (*s == ':' && s[1] != ':')
4326                     s = PEEKSPACE(s+1);
4327                 else if (s == d)
4328                     break;      /* require real whitespace or :'s */
4329                 /* XXX losing whitespace on sequential attributes here */
4330             }
4331             {
4332                 const char tmp
4333                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4334                 if (*s != ';' && *s != '}' && *s != tmp
4335                     && (tmp != '=' || *s != ')')) {
4336                     const char q = ((*s == '\'') ? '"' : '\'');
4337                     /* If here for an expression, and parsed no attrs, back
4338                        off. */
4339                     if (tmp == '=' && !attrs) {
4340                         s = PL_bufptr;
4341                         break;
4342                     }
4343                     /* MUST advance bufptr here to avoid bogus "at end of line"
4344                        context messages from yyerror().
4345                     */
4346                     PL_bufptr = s;
4347                     yyerror( (const char *)
4348                              (*s
4349                               ? Perl_form(aTHX_ "Invalid separator character "
4350                                           "%c%c%c in attribute list", q, *s, q)
4351                               : "Unterminated attribute list" ) );
4352                     if (attrs)
4353                         op_free(attrs);
4354                     OPERATOR(':');
4355                 }
4356             }
4357         got_attrs:
4358             if (attrs) {
4359                 start_force(PL_curforce);
4360                 NEXTVAL_NEXTTOKE.opval = attrs;
4361                 CURMAD('_', PL_nextwhite);
4362                 force_next(THING);
4363             }
4364 #ifdef PERL_MAD
4365             if (PL_madskills) {
4366                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4367                                      (s - SvPVX(PL_linestr)) - stuffstart);
4368             }
4369 #endif
4370             TOKEN(COLONATTR);
4371         }
4372         OPERATOR(':');
4373     case '(':
4374         s++;
4375         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4376             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
4377         else
4378             PL_expect = XTERM;
4379         s = SKIPSPACE1(s);
4380         TOKEN('(');
4381     case ';':
4382         CLINE;
4383         {
4384             const char tmp = *s++;
4385             OPERATOR(tmp);
4386         }
4387     case ')':
4388         {
4389             const char tmp = *s++;
4390             s = SKIPSPACE1(s);
4391             if (*s == '{')
4392                 PREBLOCK(tmp);
4393             TERM(tmp);
4394         }
4395     case ']':
4396         s++;
4397         if (PL_lex_brackets <= 0)
4398             yyerror("Unmatched right square bracket");
4399         else
4400             --PL_lex_brackets;
4401         if (PL_lex_state == LEX_INTERPNORMAL) {
4402             if (PL_lex_brackets == 0) {
4403                 if (*s == '-' && s[1] == '>')
4404                     PL_lex_state = LEX_INTERPENDMAYBE;
4405                 else if (*s != '[' && *s != '{')
4406                     PL_lex_state = LEX_INTERPEND;
4407             }
4408         }
4409         TERM(']');
4410     case '{':
4411       leftbracket:
4412         s++;
4413         if (PL_lex_brackets > 100) {
4414             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4415         }
4416         switch (PL_expect) {
4417         case XTERM:
4418             if (PL_lex_formbrack) {
4419                 s--;
4420                 PRETERMBLOCK(DO);
4421             }
4422             if (PL_oldoldbufptr == PL_last_lop)
4423                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4424             else
4425                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4426             OPERATOR(HASHBRACK);
4427         case XOPERATOR:
4428             while (s < PL_bufend && SPACE_OR_TAB(*s))
4429                 s++;
4430             d = s;
4431             PL_tokenbuf[0] = '\0';
4432             if (d < PL_bufend && *d == '-') {
4433                 PL_tokenbuf[0] = '-';
4434                 d++;
4435                 while (d < PL_bufend && SPACE_OR_TAB(*d))
4436                     d++;
4437             }
4438             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4439                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4440                               FALSE, &len);
4441                 while (d < PL_bufend && SPACE_OR_TAB(*d))
4442                     d++;
4443                 if (*d == '}') {
4444                     const char minus = (PL_tokenbuf[0] == '-');
4445                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4446                     if (minus)
4447                         force_next('-');
4448                 }
4449             }
4450             /* FALL THROUGH */
4451         case XATTRBLOCK:
4452         case XBLOCK:
4453             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4454             PL_expect = XSTATE;
4455             break;
4456         case XATTRTERM:
4457         case XTERMBLOCK:
4458             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4459             PL_expect = XSTATE;
4460             break;
4461         default: {
4462                 const char *t;
4463                 if (PL_oldoldbufptr == PL_last_lop)
4464                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4465                 else
4466                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4467                 s = SKIPSPACE1(s);
4468                 if (*s == '}') {
4469                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4470                         PL_expect = XTERM;
4471                         /* This hack is to get the ${} in the message. */
4472                         PL_bufptr = s+1;
4473                         yyerror("syntax error");
4474                         break;
4475                     }
4476                     OPERATOR(HASHBRACK);
4477                 }
4478                 /* This hack serves to disambiguate a pair of curlies
4479                  * as being a block or an anon hash.  Normally, expectation
4480                  * determines that, but in cases where we're not in a
4481                  * position to expect anything in particular (like inside
4482                  * eval"") we have to resolve the ambiguity.  This code
4483                  * covers the case where the first term in the curlies is a
4484                  * quoted string.  Most other cases need to be explicitly
4485                  * disambiguated by prepending a "+" before the opening
4486                  * curly in order to force resolution as an anon hash.
4487                  *
4488                  * XXX should probably propagate the outer expectation
4489                  * into eval"" to rely less on this hack, but that could
4490                  * potentially break current behavior of eval"".
4491                  * GSAR 97-07-21
4492                  */
4493                 t = s;
4494                 if (*s == '\'' || *s == '"' || *s == '`') {
4495                     /* common case: get past first string, handling escapes */
4496                     for (t++; t < PL_bufend && *t != *s;)
4497                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
4498                             t++;
4499                     t++;
4500                 }
4501                 else if (*s == 'q') {
4502                     if (++t < PL_bufend
4503                         && (!isALNUM(*t)
4504                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4505                                 && !isALNUM(*t))))
4506                     {
4507                         /* skip q//-like construct */
4508                         const char *tmps;
4509                         char open, close, term;
4510                         I32 brackets = 1;
4511
4512                         while (t < PL_bufend && isSPACE(*t))
4513                             t++;
4514                         /* check for q => */
4515                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4516                             OPERATOR(HASHBRACK);
4517                         }
4518                         term = *t;
4519                         open = term;
4520                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4521                             term = tmps[5];
4522                         close = term;
4523                         if (open == close)
4524                             for (t++; t < PL_bufend; t++) {
4525                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4526                                     t++;
4527                                 else if (*t == open)
4528                                     break;
4529                             }
4530                         else {
4531                             for (t++; t < PL_bufend; t++) {
4532                                 if (*t == '\\' && t+1 < PL_bufend)
4533                                     t++;
4534                                 else if (*t == close && --brackets <= 0)
4535                                     break;
4536                                 else if (*t == open)
4537                                     brackets++;
4538                             }
4539                         }
4540                         t++;
4541                     }
4542                     else
4543                         /* skip plain q word */
4544                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4545                              t += UTF8SKIP(t);
4546                 }
4547                 else if (isALNUM_lazy_if(t,UTF)) {
4548                     t += UTF8SKIP(t);
4549                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4550                          t += UTF8SKIP(t);
4551                 }
4552                 while (t < PL_bufend && isSPACE(*t))
4553                     t++;
4554                 /* if comma follows first term, call it an anon hash */
4555                 /* XXX it could be a comma expression with loop modifiers */
4556                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4557                                    || (*t == '=' && t[1] == '>')))
4558                     OPERATOR(HASHBRACK);
4559                 if (PL_expect == XREF)
4560                     PL_expect = XTERM;
4561                 else {
4562                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4563                     PL_expect = XSTATE;
4564                 }
4565             }
4566             break;
4567         }
4568         yylval.ival = CopLINE(PL_curcop);
4569         if (isSPACE(*s) || *s == '#')
4570             PL_copline = NOLINE;   /* invalidate current command line number */
4571         TOKEN('{');
4572     case '}':
4573       rightbracket:
4574         s++;
4575         if (PL_lex_brackets <= 0)
4576             yyerror("Unmatched right curly bracket");
4577         else
4578             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4579         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4580             PL_lex_formbrack = 0;
4581         if (PL_lex_state == LEX_INTERPNORMAL) {
4582             if (PL_lex_brackets == 0) {
4583                 if (PL_expect & XFAKEBRACK) {
4584                     PL_expect &= XENUMMASK;
4585                     PL_lex_state = LEX_INTERPEND;
4586                     PL_bufptr = s;
4587 #if 0
4588                     if (PL_madskills) {
4589                         if (!PL_thiswhite)
4590                             PL_thiswhite = newSVpvs("");
4591                         sv_catpvn(PL_thiswhite,"}",1);
4592                     }
4593 #endif
4594                     return yylex();     /* ignore fake brackets */
4595                 }
4596                 if (*s == '-' && s[1] == '>')
4597                     PL_lex_state = LEX_INTERPENDMAYBE;
4598                 else if (*s != '[' && *s != '{')
4599                     PL_lex_state = LEX_INTERPEND;
4600             }
4601         }
4602         if (PL_expect & XFAKEBRACK) {
4603             PL_expect &= XENUMMASK;
4604             PL_bufptr = s;
4605             return yylex();             /* ignore fake brackets */
4606         }
4607         start_force(PL_curforce);
4608         if (PL_madskills) {
4609             curmad('X', newSVpvn(s-1,1));
4610             CURMAD('_', PL_thiswhite);
4611         }
4612         force_next('}');
4613 #ifdef PERL_MAD
4614         if (!PL_thistoken)
4615             PL_thistoken = newSVpvs("");
4616 #endif
4617         TOKEN(';');
4618     case '&':
4619         s++;
4620         if (*s++ == '&')
4621             AOPERATOR(ANDAND);
4622         s--;
4623         if (PL_expect == XOPERATOR) {
4624             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4625                 && isIDFIRST_lazy_if(s,UTF))
4626             {
4627                 CopLINE_dec(PL_curcop);
4628                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4629                 CopLINE_inc(PL_curcop);
4630             }
4631             BAop(OP_BIT_AND);
4632         }
4633
4634         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4635         if (*PL_tokenbuf) {
4636             PL_expect = XOPERATOR;
4637             force_ident(PL_tokenbuf, '&');
4638         }
4639         else
4640             PREREF('&');
4641         yylval.ival = (OPpENTERSUB_AMPER<<8);
4642         TERM('&');
4643
4644     case '|':
4645         s++;
4646         if (*s++ == '|')
4647             AOPERATOR(OROR);
4648         s--;
4649         BOop(OP_BIT_OR);
4650     case '=':
4651         s++;
4652         {
4653             const char tmp = *s++;
4654             if (tmp == '=')
4655                 Eop(OP_EQ);
4656             if (tmp == '>')
4657                 OPERATOR(',');
4658             if (tmp == '~')
4659                 PMop(OP_MATCH);
4660             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4661                 && strchr("+-*/%.^&|<",tmp))
4662                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4663                             "Reversed %c= operator",(int)tmp);
4664             s--;
4665             if (PL_expect == XSTATE && isALPHA(tmp) &&
4666                 (s == PL_linestart+1 || s[-2] == '\n') )
4667                 {
4668                     if (PL_in_eval && !PL_rsfp) {
4669                         d = PL_bufend;
4670                         while (s < d) {
4671                             if (*s++ == '\n') {
4672                                 incline(s);
4673                                 if (strnEQ(s,"=cut",4)) {
4674                                     s = strchr(s,'\n');
4675                                     if (s)
4676                                         s++;
4677                                     else
4678                                         s = d;
4679                                     incline(s);
4680                                     goto retry;
4681                                 }
4682                             }
4683                         }
4684                         goto retry;
4685                     }
4686 #ifdef PERL_MAD
4687                     if (PL_madskills) {
4688                         if (!PL_thiswhite)
4689                             PL_thiswhite = newSVpvs("");
4690                         sv_catpvn(PL_thiswhite, PL_linestart,
4691                                   PL_bufend - PL_linestart);
4692                     }
4693 #endif
4694                     s = PL_bufend;
4695                     PL_doextract = TRUE;
4696                     goto retry;
4697                 }
4698         }
4699         if (PL_lex_brackets < PL_lex_formbrack) {
4700             const char *t = s;
4701 #ifdef PERL_STRICT_CR
4702             while (SPACE_OR_TAB(*t))
4703 #else
4704             while (SPACE_OR_TAB(*t) || *t == '\r')
4705 #endif
4706                 t++;
4707             if (*t == '\n' || *t == '#') {
4708                 s--;
4709                 PL_expect = XBLOCK;
4710                 goto leftbracket;
4711             }
4712         }
4713         yylval.ival = 0;
4714         OPERATOR(ASSIGNOP);
4715     case '!':
4716         s++;
4717         {
4718             const char tmp = *s++;
4719             if (tmp == '=') {
4720                 /* was this !=~ where !~ was meant?
4721                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4722
4723                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4724                     const char *t = s+1;
4725
4726                     while (t < PL_bufend && isSPACE(*t))
4727                         ++t;
4728
4729                     if (*t == '/' || *t == '?' ||
4730                         ((*t == 'm' || *t == 's' || *t == 'y')
4731                          && !isALNUM(t[1])) ||
4732                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4733                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4734                                     "!=~ should be !~");
4735                 }
4736                 Eop(OP_NE);
4737             }
4738             if (tmp == '~')
4739                 PMop(OP_NOT);
4740         }
4741         s--;
4742         OPERATOR('!');
4743     case '<':
4744         if (PL_expect != XOPERATOR) {
4745             if (s[1] != '<' && !strchr(s,'>'))
4746                 check_uni();
4747             if (s[1] == '<')
4748                 s = scan_heredoc(s);
4749             else
4750                 s = scan_inputsymbol(s);
4751             TERM(sublex_start());
4752         }
4753         s++;
4754         {
4755             char tmp = *s++;
4756             if (tmp == '<')
4757                 SHop(OP_LEFT_SHIFT);
4758             if (tmp == '=') {
4759                 tmp = *s++;
4760                 if (tmp == '>')
4761                     Eop(OP_NCMP);
4762                 s--;
4763                 Rop(OP_LE);
4764             }
4765         }
4766         s--;
4767         Rop(OP_LT);
4768     case '>':
4769         s++;
4770         {
4771             const char tmp = *s++;
4772             if (tmp == '>')
4773                 SHop(OP_RIGHT_SHIFT);
4774             else if (tmp == '=')
4775                 Rop(OP_GE);
4776         }
4777         s--;
4778         Rop(OP_GT);
4779
4780     case '$':
4781         CLINE;
4782
4783         if (PL_expect == XOPERATOR) {
4784             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4785                 PL_expect = XTERM;
4786                 deprecate_old(commaless_variable_list);
4787                 return REPORT(','); /* grandfather non-comma-format format */
4788             }
4789         }
4790
4791         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4792             PL_tokenbuf[0] = '@';
4793             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4794                            sizeof PL_tokenbuf - 1, FALSE);
4795             if (PL_expect == XOPERATOR)
4796                 no_op("Array length", s);
4797             if (!PL_tokenbuf[1])
4798                 PREREF(DOLSHARP);
4799             PL_expect = XOPERATOR;
4800             PL_pending_ident = '#';
4801             TOKEN(DOLSHARP);
4802         }
4803
4804         PL_tokenbuf[0] = '$';
4805         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4806                        sizeof PL_tokenbuf - 1, FALSE);
4807         if (PL_expect == XOPERATOR)
4808             no_op("Scalar", s);
4809         if (!PL_tokenbuf[1]) {
4810             if (s == PL_bufend)
4811                 yyerror("Final $ should be \\$ or $name");
4812             PREREF('$');
4813         }
4814
4815         /* This kludge not intended to be bulletproof. */
4816         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4817             yylval.opval = newSVOP(OP_CONST, 0,
4818                                    newSViv(CopARYBASE_get(&PL_compiling)));
4819             yylval.opval->op_private = OPpCONST_ARYBASE;
4820             TERM(THING);
4821         }
4822
4823         d = s;
4824         {
4825             const char tmp = *s;
4826             if (PL_lex_state == LEX_NORMAL)
4827                 s = SKIPSPACE1(s);
4828
4829             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4830                 && intuit_more(s)) {
4831                 if (*s == '[') {
4832                     PL_tokenbuf[0] = '@';
4833                     if (ckWARN(WARN_SYNTAX)) {
4834                         char *t = s+1;
4835
4836                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4837                             t++;
4838                         if (*t++ == ',') {
4839                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4840                             while (t < PL_bufend && *t != ']')
4841                                 t++;
4842                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4843                                         "Multidimensional syntax %.*s not supported",
4844                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
4845                         }
4846                     }
4847                 }
4848                 else if (*s == '{') {
4849                     char *t;
4850                     PL_tokenbuf[0] = '%';
4851                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
4852                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4853                         {
4854                             char tmpbuf[sizeof PL_tokenbuf];
4855                             do {
4856                                 t++;
4857                             } while (isSPACE(*t));
4858                             if (isIDFIRST_lazy_if(t,UTF)) {
4859                                 STRLEN len;
4860                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4861                                               &len);
4862                                 while (isSPACE(*t))
4863                                     t++;
4864                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4865                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4866                                                 "You need to quote \"%s\"",
4867                                                 tmpbuf);
4868                             }
4869                         }
4870                 }
4871             }
4872
4873             PL_expect = XOPERATOR;
4874             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4875                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4876                 if (!islop || PL_last_lop_op == OP_GREPSTART)
4877                     PL_expect = XOPERATOR;
4878                 else if (strchr("$@\"'`q", *s))
4879                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
4880                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4881                     PL_expect = XTERM;          /* e.g. print $fh &sub */
4882                 else if (isIDFIRST_lazy_if(s,UTF)) {
4883                     char tmpbuf[sizeof PL_tokenbuf];
4884                     int t2;
4885                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4886                     if ((t2 = keyword(tmpbuf, len, 0))) {
4887                         /* binary operators exclude handle interpretations */
4888                         switch (t2) {
4889                         case -KEY_x:
4890                         case -KEY_eq:
4891                         case -KEY_ne:
4892                         case -KEY_gt:
4893                         case -KEY_lt:
4894                         case -KEY_ge:
4895                         case -KEY_le:
4896                         case -KEY_cmp:
4897                             break;
4898                         default:
4899                             PL_expect = XTERM;  /* e.g. print $fh length() */
4900                             break;
4901                         }
4902                     }
4903                     else {
4904                         PL_expect = XTERM;      /* e.g. print $fh subr() */
4905                     }
4906                 }
4907                 else if (isDIGIT(*s))
4908                     PL_expect = XTERM;          /* e.g. print $fh 3 */
4909                 else if (*s == '.' && isDIGIT(s[1]))
4910                     PL_expect = XTERM;          /* e.g. print $fh .3 */
4911                 else if ((*s == '?' || *s == '-' || *s == '+')
4912                          && !isSPACE(s[1]) && s[1] != '=')
4913                     PL_expect = XTERM;          /* e.g. print $fh -1 */
4914                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4915                          && s[1] != '/')
4916                     PL_expect = XTERM;          /* e.g. print $fh /.../
4917                                                    XXX except DORDOR operator
4918                                                 */
4919                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4920                          && s[2] != '=')
4921                     PL_expect = XTERM;          /* print $fh <<"EOF" */
4922             }
4923         }
4924         PL_pending_ident = '$';
4925         TOKEN('$');
4926
4927     case '@':
4928         if (PL_expect == XOPERATOR)
4929             no_op("Array", s);
4930         PL_tokenbuf[0] = '@';
4931         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4932         if (!PL_tokenbuf[1]) {
4933             PREREF('@');
4934         }
4935         if (PL_lex_state == LEX_NORMAL)
4936             s = SKIPSPACE1(s);
4937         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4938             if (*s == '{')
4939                 PL_tokenbuf[0] = '%';
4940
4941             /* Warn about @ where they meant $. */
4942             if (*s == '[' || *s == '{') {
4943                 if (ckWARN(WARN_SYNTAX)) {
4944                     const char *t = s + 1;
4945                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4946                         t++;
4947                     if (*t == '}' || *t == ']') {
4948                         t++;
4949                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4950                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4951                             "Scalar value %.*s better written as $%.*s",
4952                             (int)(t-PL_bufptr), PL_bufptr,
4953                             (int)(t-PL_bufptr-1), PL_bufptr+1);
4954                     }
4955                 }
4956             }
4957         }
4958         PL_pending_ident = '@';
4959         TERM('@');
4960
4961      case '/':                  /* may be division, defined-or, or pattern */
4962         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4963             s += 2;
4964             AOPERATOR(DORDOR);
4965         }
4966      case '?':                  /* may either be conditional or pattern */
4967          if(PL_expect == XOPERATOR) {
4968              char tmp = *s++;
4969              if(tmp == '?') {
4970                   OPERATOR('?');
4971              }
4972              else {
4973                  tmp = *s++;
4974                  if(tmp == '/') {
4975                      /* A // operator. */
4976                     AOPERATOR(DORDOR);
4977                  }
4978                  else {
4979                      s--;
4980                      Mop(OP_DIVIDE);
4981                  }
4982              }
4983          }
4984          else {
4985              /* Disable warning on "study /blah/" */
4986              if (PL_oldoldbufptr == PL_last_uni
4987               && (*PL_last_uni != 's' || s - PL_last_uni < 5
4988                   || memNE(PL_last_uni, "study", 5)
4989                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
4990               ))
4991                  check_uni();
4992              s = scan_pat(s,OP_MATCH);
4993              TERM(sublex_start());
4994          }
4995
4996     case '.':
4997         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4998 #ifdef PERL_STRICT_CR
4999             && s[1] == '\n'
5000 #else
5001             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5002 #endif
5003             && (s == PL_linestart || s[-1] == '\n') )
5004         {
5005             PL_lex_formbrack = 0;
5006             PL_expect = XSTATE;
5007             goto rightbracket;
5008         }
5009         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5010             char tmp = *s++;
5011             if (*s == tmp) {
5012                 s++;
5013                 if (*s == tmp) {
5014                     s++;
5015                     yylval.ival = OPf_SPECIAL;
5016                 }
5017                 else
5018                     yylval.ival = 0;
5019                 OPERATOR(DOTDOT);
5020             }
5021             if (PL_expect != XOPERATOR)
5022                 check_uni();
5023             Aop(OP_CONCAT);
5024         }
5025         /* FALL THROUGH */
5026     case '0': case '1': case '2': case '3': case '4':
5027     case '5': case '6': case '7': case '8': case '9':
5028         s = scan_num(s, &yylval);
5029         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5030         if (PL_expect == XOPERATOR)
5031             no_op("Number",s);
5032         TERM(THING);
5033
5034     case '\'':
5035         s = scan_str(s,!!PL_madskills,FALSE);
5036         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5037         if (PL_expect == XOPERATOR) {
5038             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5039                 PL_expect = XTERM;
5040                 deprecate_old(commaless_variable_list);
5041                 return REPORT(','); /* grandfather non-comma-format format */
5042             }
5043             else
5044                 no_op("String",s);
5045         }
5046         if (!s)
5047             missingterm(NULL);
5048         yylval.ival = OP_CONST;
5049         TERM(sublex_start());
5050
5051     case '"':
5052         s = scan_str(s,!!PL_madskills,FALSE);
5053         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5054         if (PL_expect == XOPERATOR) {
5055             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5056                 PL_expect = XTERM;
5057                 deprecate_old(commaless_variable_list);
5058                 return REPORT(','); /* grandfather non-comma-format format */
5059             }
5060             else
5061                 no_op("String",s);
5062         }
5063         if (!s)
5064             missingterm(NULL);
5065         yylval.ival = OP_CONST;
5066         /* FIXME. I think that this can be const if char *d is replaced by
5067            more localised variables.  */
5068         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5069             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5070                 yylval.ival = OP_STRINGIFY;
5071                 break;
5072             }
5073         }
5074         TERM(sublex_start());
5075
5076     case '`':
5077         s = scan_str(s,!!PL_madskills,FALSE);
5078         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5079         if (PL_expect == XOPERATOR)
5080             no_op("Backticks",s);
5081         if (!s)
5082             missingterm(NULL);
5083         readpipe_override();
5084         TERM(sublex_start());
5085
5086     case '\\':
5087         s++;
5088         if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5089             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5090                         *s, *s);
5091         if (PL_expect == XOPERATOR)
5092             no_op("Backslash",s);
5093         OPERATOR(REFGEN);
5094
5095     case 'v':
5096         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5097             char *start = s + 2;
5098             while (isDIGIT(*start) || *start == '_')
5099                 start++;
5100             if (*start == '.' && isDIGIT(start[1])) {
5101                 s = scan_num(s, &yylval);
5102                 TERM(THING);
5103             }
5104             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5105             else if (!isALPHA(*start) && (PL_expect == XTERM
5106                         || PL_expect == XREF || PL_expect == XSTATE
5107                         || PL_expect == XTERMORDORDOR)) {
5108                 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5109                 const char c = *start;
5110                 GV *gv;
5111                 *start = '\0';
5112                 gv = gv_fetchpv(s, 0, SVt_PVCV);
5113                 *start = c;
5114                 if (!gv) {
5115                     s = scan_num(s, &yylval);
5116                     TERM(THING);
5117                 }
5118             }
5119         }
5120         goto keylookup;
5121     case 'x':
5122         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5123             s++;
5124             Mop(OP_REPEAT);
5125         }
5126         goto keylookup;
5127
5128     case '_':
5129     case 'a': case 'A':
5130     case 'b': case 'B':
5131     case 'c': case 'C':
5132     case 'd': case 'D':
5133     case 'e': case 'E':
5134     case 'f': case 'F':
5135     case 'g': case 'G':
5136     case 'h': case 'H':
5137     case 'i': case 'I':
5138     case 'j': case 'J':
5139     case 'k': case 'K':
5140     case 'l': case 'L':
5141     case 'm': case 'M':
5142     case 'n': case 'N':
5143     case 'o': case 'O':
5144     case 'p': case 'P':
5145     case 'q': case 'Q':
5146     case 'r': case 'R':
5147     case 's': case 'S':
5148     case 't': case 'T':
5149     case 'u': case 'U':
5150               case 'V':
5151     case 'w': case 'W':
5152               case 'X':
5153     case 'y': case 'Y':
5154     case 'z': case 'Z':
5155
5156       keylookup: {
5157         I32 tmp;
5158
5159         orig_keyword = 0;
5160         gv = NULL;
5161         gvp = NULL;
5162
5163         PL_bufptr = s;
5164         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5165
5166         /* Some keywords can be followed by any delimiter, including ':' */
5167         tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5168                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5169                              (PL_tokenbuf[0] == 'q' &&
5170                               strchr("qwxr", PL_tokenbuf[1])))));
5171
5172         /* x::* is just a word, unless x is "CORE" */
5173         if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5174             goto just_a_word;
5175
5176         d = s;
5177         while (d < PL_bufend && isSPACE(*d))
5178                 d++;    /* no comments skipped here, or s### is misparsed */
5179
5180         /* Is this a label? */
5181         if (!tmp && PL_expect == XSTATE
5182               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5183             s = d + 1;
5184             yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5185             CLINE;
5186             TOKEN(LABEL);
5187         }
5188
5189         /* Check for keywords */
5190         tmp = keyword(PL_tokenbuf, len, 0);
5191
5192         /* Is this a word before a => operator? */
5193         if (*d == '=' && d[1] == '>') {
5194             CLINE;
5195             yylval.opval
5196                 = (OP*)newSVOP(OP_CONST, 0,
5197                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5198             yylval.opval->op_private = OPpCONST_BARE;
5199             TERM(WORD);
5200         }
5201
5202         if (tmp < 0) {                  /* second-class keyword? */
5203             GV *ogv = NULL;     /* override (winner) */
5204             GV *hgv = NULL;     /* hidden (loser) */
5205             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5206                 CV *cv;
5207                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5208                     (cv = GvCVu(gv)))
5209                 {
5210                     if (GvIMPORTED_CV(gv))
5211                         ogv = gv;
5212                     else if (! CvMETHOD(cv))
5213                         hgv = gv;
5214                 }
5215                 if (!ogv &&
5216                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5217                     (gv = *gvp) && isGV_with_GP(gv) &&
5218                     GvCVu(gv) && GvIMPORTED_CV(gv))
5219                 {
5220                     ogv = gv;
5221                 }
5222             }
5223             if (ogv) {
5224                 orig_keyword = tmp;
5225                 tmp = 0;                /* overridden by import or by GLOBAL */
5226             }
5227             else if (gv && !gvp
5228                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5229                      && GvCVu(gv))
5230             {
5231                 tmp = 0;                /* any sub overrides "weak" keyword */
5232             }
5233             else {                      /* no override */
5234                 tmp = -tmp;
5235                 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5236                     Perl_warner(aTHX_ packWARN(WARN_MISC),
5237                             "dump() better written as CORE::dump()");
5238                 }
5239                 gv = NULL;
5240                 gvp = 0;
5241                 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5242                         && ckWARN(WARN_AMBIGUOUS))      /* never ambiguous */
5243                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5244                         "Ambiguous call resolved as CORE::%s(), %s",
5245                          GvENAME(hgv), "qualify as such or use &");
5246             }
5247         }
5248
5249       reserved_word:
5250         switch (tmp) {
5251
5252         default:                        /* not a keyword */
5253             /* Trade off - by using this evil construction we can pull the
5254                variable gv into the block labelled keylookup. If not, then
5255                we have to give it function scope so that the goto from the
5256                earlier ':' case doesn't bypass the initialisation.  */
5257             if (0) {
5258             just_a_word_zero_gv:
5259                 gv = NULL;
5260                 gvp = NULL;
5261                 orig_keyword = 0;
5262             }
5263           just_a_word: {
5264                 SV *sv;
5265                 int pkgname = 0;
5266                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5267                 CV *cv;
5268 #ifdef PERL_MAD
5269                 SV *nextPL_nextwhite = 0;
5270 #endif
5271
5272
5273                 /* Get the rest if it looks like a package qualifier */
5274
5275                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5276                     STRLEN morelen;
5277                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5278                                   TRUE, &morelen);
5279                     if (!morelen)
5280                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5281                                 *s == '\'' ? "'" : "::");
5282                     len += morelen;
5283                     pkgname = 1;
5284                 }
5285
5286                 if (PL_expect == XOPERATOR) {
5287                     if (PL_bufptr == PL_linestart) {
5288                         CopLINE_dec(PL_curcop);
5289                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5290                         CopLINE_inc(PL_curcop);
5291                     }
5292                     else
5293                         no_op("Bareword",s);
5294                 }
5295
5296                 /* Look for a subroutine with this name in current package,
5297                    unless name is "Foo::", in which case Foo is a bearword
5298                    (and a package name). */
5299
5300                 if (len > 2 && !PL_madskills &&
5301                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5302                 {
5303                     if (ckWARN(WARN_BAREWORD)
5304                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5305                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5306                             "Bareword \"%s\" refers to nonexistent package",
5307                              PL_tokenbuf);
5308                     len -= 2;
5309                     PL_tokenbuf[len] = '\0';
5310                     gv = NULL;
5311                     gvp = 0;
5312                 }
5313                 else {
5314                     if (!gv) {
5315                         /* Mustn't actually add anything to a symbol table.
5316                            But also don't want to "initialise" any placeholder
5317                            constants that might already be there into full
5318                            blown PVGVs with attached PVCV.  */
5319                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5320                                                GV_NOADD_NOINIT, SVt_PVCV);
5321                     }
5322                     len = 0;
5323                 }
5324
5325                 /* if we saw a global override before, get the right name */
5326
5327                 if (gvp) {
5328                     sv = newSVpvs("CORE::GLOBAL::");
5329                     sv_catpv(sv,PL_tokenbuf);
5330                 }
5331                 else {
5332                     /* If len is 0, newSVpv does strlen(), which is correct.
5333                        If len is non-zero, then it will be the true length,
5334                        and so the scalar will be created correctly.  */
5335                     sv = newSVpv(PL_tokenbuf,len);
5336                 }
5337 #ifdef PERL_MAD
5338                 if (PL_madskills && !PL_thistoken) {
5339                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5340                     PL_thistoken = newSVpv(start,s - start);
5341                     PL_realtokenstart = s - SvPVX(PL_linestr);
5342                 }
5343 #endif
5344
5345                 /* Presume this is going to be a bareword of some sort. */
5346
5347                 CLINE;
5348                 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5349                 yylval.opval->op_private = OPpCONST_BARE;
5350                 /* UTF-8 package name? */
5351                 if (UTF && !IN_BYTES &&
5352                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5353                     SvUTF8_on(sv);
5354
5355                 /* And if "Foo::", then that's what it certainly is. */
5356
5357                 if (len)
5358                     goto safe_bareword;
5359
5360                 /* Do the explicit type check so that we don't need to force
5361                    the initialisation of the symbol table to have a real GV.
5362                    Beware - gv may not really be a PVGV, cv may not really be
5363                    a PVCV, (because of the space optimisations that gv_init
5364                    understands) But they're true if for this symbol there is
5365                    respectively a typeglob and a subroutine.
5366                 */
5367                 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5368                     /* Real typeglob, so get the real subroutine: */
5369                            ? GvCVu(gv)
5370                     /* A proxy for a subroutine in this package? */
5371                            : SvOK(gv) ? (CV *) gv : NULL)
5372                     : NULL;
5373
5374                 /* See if it's the indirect object for a list operator. */
5375
5376                 if (PL_oldoldbufptr &&
5377                     PL_oldoldbufptr < PL_bufptr &&
5378                     (PL_oldoldbufptr == PL_last_lop
5379                      || PL_oldoldbufptr == PL_last_uni) &&
5380                     /* NO SKIPSPACE BEFORE HERE! */
5381                     (PL_expect == XREF ||
5382                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5383                 {
5384                     bool immediate_paren = *s == '(';
5385
5386                     /* (Now we can afford to cross potential line boundary.) */
5387                     s = SKIPSPACE2(s,nextPL_nextwhite);
5388 #ifdef PERL_MAD
5389                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
5390 #endif
5391
5392                     /* Two barewords in a row may indicate method call. */
5393
5394                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5395                         (tmp = intuit_method(s, gv, cv)))
5396                         return REPORT(tmp);
5397
5398                     /* If not a declared subroutine, it's an indirect object. */
5399                     /* (But it's an indir obj regardless for sort.) */
5400                     /* Also, if "_" follows a filetest operator, it's a bareword */
5401
5402                     if (
5403                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5404                          ((!gv || !cv) &&
5405                         (PL_last_lop_op != OP_MAPSTART &&
5406                          PL_last_lop_op != OP_GREPSTART))))
5407                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5408                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5409                        )
5410                     {
5411                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5412                         goto bareword;
5413                     }
5414                 }
5415
5416                 PL_expect = XOPERATOR;
5417 #ifdef PERL_MAD
5418                 if (isSPACE(*s))
5419                     s = SKIPSPACE2(s,nextPL_nextwhite);
5420                 PL_nextwhite = nextPL_nextwhite;
5421 #else
5422                 s = skipspace(s);
5423 #endif
5424
5425                 /* Is this a word before a => operator? */
5426                 if (*s == '=' && s[1] == '>' && !pkgname) {
5427                     CLINE;
5428                     sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5429                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5430                       SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5431                     TERM(WORD);
5432                 }
5433
5434                 /* If followed by a paren, it's certainly a subroutine. */
5435                 if (*s == '(') {
5436                     CLINE;
5437                     if (cv) {
5438                         d = s + 1;
5439                         while (SPACE_OR_TAB(*d))
5440                             d++;
5441                         if (*d == ')' && (sv = gv_const_sv(gv))) {
5442                             s = d + 1;
5443                             goto its_constant;
5444                         }
5445                     }
5446 #ifdef PERL_MAD
5447                     if (PL_madskills) {
5448                         PL_nextwhite = PL_thiswhite;
5449                         PL_thiswhite = 0;
5450                     }
5451                     start_force(PL_curforce);
5452 #endif
5453                     NEXTVAL_NEXTTOKE.opval = yylval.opval;
5454                     PL_expect = XOPERATOR;
5455 #ifdef PERL_MAD
5456                     if (PL_madskills) {
5457                         PL_nextwhite = nextPL_nextwhite;
5458                         curmad('X', PL_thistoken);
5459                         PL_thistoken = newSVpvs("");
5460                     }
5461 #endif
5462                     force_next(WORD);
5463                     yylval.ival = 0;
5464                     TOKEN('&');
5465                 }
5466
5467                 /* If followed by var or block, call it a method (unless sub) */
5468
5469                 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5470                     PL_last_lop = PL_oldbufptr;
5471                     PL_last_lop_op = OP_METHOD;
5472                     PREBLOCK(METHOD);
5473                 }
5474
5475                 /* If followed by a bareword, see if it looks like indir obj. */
5476
5477                 if (!orig_keyword
5478                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5479                         && (tmp = intuit_method(s, gv, cv)))
5480                     return REPORT(tmp);
5481
5482                 /* Not a method, so call it a subroutine (if defined) */
5483
5484                 if (cv) {
5485                     if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5486                         Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5487                                 "Ambiguous use of -%s resolved as -&%s()",
5488                                 PL_tokenbuf, PL_tokenbuf);
5489                     /* Check for a constant sub */
5490                     if ((sv = gv_const_sv(gv))) {
5491                   its_constant:
5492                         SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5493                         ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5494                         yylval.opval->op_private = 0;
5495                         TOKEN(WORD);
5496                     }
5497
5498                     /* Resolve to GV now. */
5499                     if (SvTYPE(gv) != SVt_PVGV) {
5500                         gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5501                         assert (SvTYPE(gv) == SVt_PVGV);
5502                         /* cv must have been some sort of placeholder, so
5503                            now needs replacing with a real code reference.  */
5504                         cv = GvCV(gv);
5505                     }
5506
5507                     op_free(yylval.opval);
5508                     yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5509                     yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5510                     PL_last_lop = PL_oldbufptr;
5511                     PL_last_lop_op = OP_ENTERSUB;
5512                     /* Is there a prototype? */
5513                     if (
5514 #ifdef PERL_MAD
5515                         cv &&
5516 #endif
5517                         SvPOK(cv))
5518                     {
5519                         STRLEN protolen;
5520                         const char *proto = SvPV_const((SV*)cv, protolen);
5521                         if (!protolen)
5522                             TERM(FUNC0SUB);
5523                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5524                             OPERATOR(UNIOPSUB);
5525                         while (*proto == ';')
5526                             proto++;
5527                         if (*proto == '&' && *s == '{') {
5528                             sv_setpv(PL_subname,
5529                                      (const char *)
5530                                      (PL_curstash ?
5531                                       "__ANON__" : "__ANON__::__ANON__"));
5532                             PREBLOCK(LSTOPSUB);
5533                         }
5534                     }
5535 #ifdef PERL_MAD
5536                     {
5537                         if (PL_madskills) {
5538                             PL_nextwhite = PL_thiswhite;
5539                             PL_thiswhite = 0;
5540                         }
5541                         start_force(PL_curforce);
5542                         NEXTVAL_NEXTTOKE.opval = yylval.opval;
5543                         PL_expect = XTERM;
5544                         if (PL_madskills) {
5545                             PL_nextwhite = nextPL_nextwhite;
5546                             curmad('X', PL_thistoken);
5547                             PL_thistoken = newSVpvs("");
5548                         }
5549                         force_next(WORD);
5550                         TOKEN(NOAMP);
5551                     }
5552                 }
5553
5554                 /* Guess harder when madskills require "best effort". */
5555                 if (PL_madskills && (!gv || !GvCVu(gv))) {
5556                     int probable_sub = 0;
5557                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
5558                         probable_sub = 1;
5559                     else if (isALPHA(*s)) {
5560                         char tmpbuf[1024];
5561                         STRLEN tmplen;
5562                         d = s;
5563                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5564                         if (!keyword(tmpbuf, tmplen, 0))
5565                             probable_sub = 1;
5566                         else {
5567                             while (d < PL_bufend && isSPACE(*d))
5568                                 d++;
5569                             if (*d == '=' && d[1] == '>')
5570                                 probable_sub = 1;
5571                         }
5572                     }
5573                     if (probable_sub) {
5574                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5575                         op_free(yylval.opval);
5576                         yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5577                         yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5578                         PL_last_lop = PL_oldbufptr;
5579                         PL_last_lop_op = OP_ENTERSUB;
5580                         PL_nextwhite = PL_thiswhite;
5581                         PL_thiswhite = 0;
5582                         start_force(PL_curforce);
5583                         NEXTVAL_NEXTTOKE.opval = yylval.opval;
5584                         PL_expect = XTERM;
5585                         PL_nextwhite = nextPL_nextwhite;
5586                         curmad('X', PL_thistoken);
5587                         PL_thistoken = newSVpvs("");
5588                         force_next(WORD);
5589                         TOKEN(NOAMP);
5590                     }
5591 #else
5592                     NEXTVAL_NEXTTOKE.opval = yylval.opval;
5593                     PL_expect = XTERM;
5594                     force_next(WORD);
5595                     TOKEN(NOAMP);
5596 #endif
5597                 }
5598
5599                 /* Call it a bare word */
5600
5601                 if (PL_hints & HINT_STRICT_SUBS)
5602                     yylval.opval->op_private |= OPpCONST_STRICT;
5603                 else {
5604                 bareword:
5605                     if (lastchar != '-') {
5606                         if (ckWARN(WARN_RESERVED)) {
5607                             d = PL_tokenbuf;
5608                             while (isLOWER(*d))
5609                                 d++;
5610                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5611                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5612                                        PL_tokenbuf);
5613                         }
5614                     }
5615                 }
5616
5617             safe_bareword:
5618                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5619                     && ckWARN_d(WARN_AMBIGUOUS)) {
5620                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5621                         "Operator or semicolon missing before %c%s",
5622                         lastchar, PL_tokenbuf);
5623                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5624                         "Ambiguous use of %c resolved as operator %c",
5625                         lastchar, lastchar);
5626                 }
5627                 TOKEN(WORD);
5628             }
5629
5630         case KEY___FILE__:
5631             yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5632                                         newSVpv(CopFILE(PL_curcop),0));
5633             TERM(THING);
5634
5635         case KEY___LINE__:
5636             yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5637                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5638             TERM(THING);
5639
5640         case KEY___PACKAGE__:
5641             yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5642                                         (PL_curstash
5643                                          ? newSVhek(HvNAME_HEK(PL_curstash))
5644                                          : &PL_sv_undef));
5645             TERM(THING);
5646
5647         case KEY___DATA__:
5648         case KEY___END__: {
5649             GV *gv;
5650             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5651                 const char *pname = "main";
5652                 if (PL_tokenbuf[2] == 'D')
5653                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5654                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5655                                 SVt_PVIO);
5656                 GvMULTI_on(gv);
5657                 if (!GvIO(gv))
5658                     GvIOp(gv) = newIO();
5659                 IoIFP(GvIOp(gv)) = PL_rsfp;
5660 #if defined(HAS_FCNTL) && defined(F_SETFD)
5661                 {
5662                     const int fd = PerlIO_fileno(PL_rsfp);
5663                     fcntl(fd,F_SETFD,fd >= 3);
5664                 }
5665 #endif
5666                 /* Mark this internal pseudo-handle as clean */
5667                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5668                 if (PL_preprocess)
5669                     IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5670                 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5671                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5672                 else
5673                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5674 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5675                 /* if the script was opened in binmode, we need to revert
5676                  * it to text mode for compatibility; but only iff it has CRs
5677                  * XXX this is a questionable hack at best. */
5678                 if (PL_bufend-PL_bufptr > 2
5679                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5680                 {
5681                     Off_t loc = 0;
5682                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5683                         loc = PerlIO_tell(PL_rsfp);
5684                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
5685                     }
5686 #ifdef NETWARE
5687                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5688 #else
5689                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5690 #endif  /* NETWARE */
5691 #ifdef PERLIO_IS_STDIO /* really? */
5692 #  if defined(__BORLANDC__)
5693                         /* XXX see note in do_binmode() */
5694                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5695 #  endif
5696 #endif
5697                         if (loc > 0)
5698                             PerlIO_seek(PL_rsfp, loc, 0);
5699                     }
5700                 }
5701 #endif
5702 #ifdef PERLIO_LAYERS
5703                 if (!IN_BYTES) {
5704                     if (UTF)
5705                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5706                     else if (PL_encoding) {
5707                         SV *name;
5708                         dSP;
5709                         ENTER;
5710                         SAVETMPS;
5711                         PUSHMARK(sp);
5712                         EXTEND(SP, 1);
5713                         XPUSHs(PL_encoding);
5714                         PUTBACK;
5715                         call_method("name", G_SCALAR);
5716                         SPAGAIN;
5717                         name = POPs;
5718                         PUTBACK;
5719                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5720                                             Perl_form(aTHX_ ":encoding(%"SVf")",
5721                                                       SVfARG(name)));
5722                         FREETMPS;
5723                         LEAVE;
5724                     }
5725                 }
5726 #endif
5727 #ifdef PERL_MAD
5728                 if (PL_madskills) {
5729                     if (PL_realtokenstart >= 0) {
5730                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5731                         if (!PL_endwhite)
5732                             PL_endwhite = newSVpvs("");
5733                         sv_catsv(PL_endwhite, PL_thiswhite);
5734                         PL_thiswhite = 0;
5735                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5736                         PL_realtokenstart = -1;
5737                     }
5738                     while ((s = filter_gets(PL_endwhite, PL_rsfp,
5739                                  SvCUR(PL_endwhite))) != NULL) ;
5740                 }
5741 #endif
5742                 PL_rsfp = NULL;
5743             }
5744             goto fake_eof;
5745         }
5746
5747         case KEY_AUTOLOAD:
5748         case KEY_DESTROY:
5749         case KEY_BEGIN:
5750         case KEY_UNITCHECK:
5751         case KEY_CHECK:
5752         case KEY_INIT:
5753         case KEY_END:
5754             if (PL_expect == XSTATE) {
5755                 s = PL_bufptr;
5756                 goto really_sub;
5757             }
5758             goto just_a_word;
5759
5760         case KEY_CORE:
5761             if (*s == ':' && s[1] == ':') {
5762                 s += 2;
5763                 d = s;
5764                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5765                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5766                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5767                 if (tmp < 0)
5768                     tmp = -tmp;
5769                 else if (tmp == KEY_require || tmp == KEY_do)
5770                     /* that's a way to remember we saw "CORE::" */
5771                     orig_keyword = tmp;
5772                 goto reserved_word;
5773             }
5774             goto just_a_word;
5775
5776         case KEY_abs:
5777             UNI(OP_ABS);
5778
5779         case KEY_alarm:
5780             UNI(OP_ALARM);
5781
5782         case KEY_accept:
5783             LOP(OP_ACCEPT,XTERM);
5784
5785         case KEY_and:
5786             OPERATOR(ANDOP);
5787
5788         case KEY_atan2:
5789             LOP(OP_ATAN2,XTERM);
5790
5791         case KEY_bind:
5792             LOP(OP_BIND,XTERM);
5793
5794         case KEY_binmode:
5795             LOP(OP_BINMODE,XTERM);
5796
5797         case KEY_bless:
5798             LOP(OP_BLESS,XTERM);
5799
5800         case KEY_break:
5801             FUN0(OP_BREAK);
5802
5803         case KEY_chop:
5804             UNI(OP_CHOP);
5805
5806         case KEY_continue:
5807             /* When 'use switch' is in effect, continue has a dual
5808                life as a control operator. */
5809             {
5810                 if (!FEATURE_IS_ENABLED("switch"))
5811                     PREBLOCK(CONTINUE);
5812                 else {
5813                     /* We have to disambiguate the two senses of
5814                       "continue". If the next token is a '{' then
5815                       treat it as the start of a continue block;
5816                       otherwise treat it as a control operator.
5817                      */
5818                     s = skipspace(s);
5819                     if (*s == '{')
5820             PREBLOCK(CONTINUE);
5821                     else
5822                         FUN0(OP_CONTINUE);
5823                 }
5824             }
5825
5826         case KEY_chdir:
5827             /* may use HOME */
5828             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5829             UNI(OP_CHDIR);
5830
5831         case KEY_close:
5832             UNI(OP_CLOSE);
5833
5834         case KEY_closedir:
5835             UNI(OP_CLOSEDIR);
5836
5837         case KEY_cmp:
5838             Eop(OP_SCMP);
5839
5840         case KEY_caller:
5841             UNI(OP_CALLER);
5842
5843         case KEY_crypt:
5844 #ifdef FCRYPT
5845             if (!PL_cryptseen) {
5846                 PL_cryptseen = TRUE;
5847                 init_des();
5848             }
5849 #endif
5850             LOP(OP_CRYPT,XTERM);
5851
5852         case KEY_chmod:
5853             LOP(OP_CHMOD,XTERM);
5854
5855         case KEY_chown:
5856             LOP(OP_CHOWN,XTERM);
5857
5858         case KEY_connect:
5859             LOP(OP_CONNECT,XTERM);
5860
5861         case KEY_chr:
5862             UNI(OP_CHR);
5863
5864         case KEY_cos:
5865             UNI(OP_COS);
5866
5867         case KEY_chroot:
5868             UNI(OP_CHROOT);
5869
5870         case KEY_default:
5871             PREBLOCK(DEFAULT);
5872
5873         case KEY_do:
5874             s = SKIPSPACE1(s);
5875             if (*s == '{')
5876                 PRETERMBLOCK(DO);
5877             if (*s != '\'')
5878                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5879             if (orig_keyword == KEY_do) {
5880                 orig_keyword = 0;
5881                 yylval.ival = 1;
5882             }
5883             else
5884                 yylval.ival = 0;
5885             OPERATOR(DO);
5886
5887         case KEY_die:
5888             PL_hints |= HINT_BLOCK_SCOPE;
5889             LOP(OP_DIE,XTERM);
5890
5891         case KEY_defined:
5892             UNI(OP_DEFINED);
5893
5894         case KEY_delete:
5895             UNI(OP_DELETE);
5896
5897         case KEY_dbmopen:
5898             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5899             LOP(OP_DBMOPEN,XTERM);
5900
5901         case KEY_dbmclose:
5902             UNI(OP_DBMCLOSE);
5903
5904         case KEY_dump:
5905             s = force_word(s,WORD,TRUE,FALSE,FALSE);
5906             LOOPX(OP_DUMP);
5907
5908         case KEY_else:
5909             PREBLOCK(ELSE);
5910
5911         case KEY_elsif:
5912             yylval.ival = CopLINE(PL_curcop);
5913             OPERATOR(ELSIF);
5914
5915         case KEY_eq:
5916             Eop(OP_SEQ);
5917
5918         case KEY_exists:
5919             UNI(OP_EXISTS);
5920
5921         case KEY_exit:
5922             if (PL_madskills)
5923                 UNI(OP_INT);
5924             UNI(OP_EXIT);
5925
5926         case KEY_eval:
5927             s = SKIPSPACE1(s);
5928             PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5929             UNIBRACK(OP_ENTEREVAL);
5930
5931         case KEY_eof:
5932             UNI(OP_EOF);
5933
5934         case KEY_exp:
5935             UNI(OP_EXP);
5936
5937         case KEY_each:
5938             UNI(OP_EACH);
5939
5940         case KEY_exec:
5941             LOP(OP_EXEC,XREF);
5942
5943         case KEY_endhostent:
5944             FUN0(OP_EHOSTENT);
5945
5946         case KEY_endnetent:
5947             FUN0(OP_ENETENT);
5948
5949         case KEY_endservent:
5950             FUN0(OP_ESERVENT);
5951
5952         case KEY_endprotoent:
5953             FUN0(OP_EPROTOENT);
5954
5955         case KEY_endpwent:
5956             FUN0(OP_EPWENT);
5957
5958         case KEY_endgrent:
5959             FUN0(OP_EGRENT);
5960
5961         case KEY_for:
5962         case KEY_foreach:
5963             yylval.ival = CopLINE(PL_curcop);
5964             s = SKIPSPACE1(s);
5965             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5966                 char *p = s;
5967 #ifdef PERL_MAD
5968                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5969 #endif
5970
5971                 if ((PL_bufend - p) >= 3 &&
5972                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5973                     p += 2;
5974                 else if ((PL_bufend - p) >= 4 &&
5975                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5976                     p += 3;
5977                 p = PEEKSPACE(p);
5978                 if (isIDFIRST_lazy_if(p,UTF)) {
5979                     p = scan_ident(p, PL_bufend,
5980                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5981                     p = PEEKSPACE(p);
5982                 }
5983                 if (*p != '$')
5984                     Perl_croak(aTHX_ "Missing $ on loop variable");
5985 #ifdef PERL_MAD
5986                 s = SvPVX(PL_linestr) + soff;
5987 #endif
5988             }
5989             OPERATOR(FOR);
5990
5991         case KEY_formline:
5992             LOP(OP_FORMLINE,XTERM);
5993
5994         case KEY_fork:
5995             FUN0(OP_FORK);
5996
5997         case KEY_fcntl:
5998             LOP(OP_FCNTL,XTERM);
5999
6000         case KEY_fileno:
6001             UNI(OP_FILENO);
6002
6003         case KEY_flock:
6004             LOP(OP_FLOCK,XTERM);
6005
6006         case KEY_gt:
6007             Rop(OP_SGT);
6008
6009         case KEY_ge:
6010             Rop(OP_SGE);
6011
6012         case KEY_grep:
6013             LOP(OP_GREPSTART, XREF);
6014
6015         case KEY_goto:
6016             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6017             LOOPX(OP_GOTO);
6018
6019         case KEY_gmtime:
6020             UNI(OP_GMTIME);
6021
6022         case KEY_getc:
6023             UNIDOR(OP_GETC);
6024
6025         case KEY_getppid:
6026             FUN0(OP_GETPPID);
6027
6028         case KEY_getpgrp:
6029             UNI(OP_GETPGRP);
6030
6031         case KEY_getpriority:
6032             LOP(OP_GETPRIORITY,XTERM);
6033
6034         case KEY_getprotobyname:
6035             UNI(OP_GPBYNAME);
6036
6037         case KEY_getprotobynumber:
6038             LOP(OP_GPBYNUMBER,XTERM);
6039
6040         case KEY_getprotoent:
6041             FUN0(OP_GPROTOENT);
6042
6043         case KEY_getpwent:
6044             FUN0(OP_GPWENT);
6045
6046         case KEY_getpwnam:
6047             UNI(OP_GPWNAM);
6048
6049         case KEY_getpwuid:
6050             UNI(OP_GPWUID);
6051
6052         case KEY_getpeername:
6053             UNI(OP_GETPEERNAME);
6054
6055         case KEY_gethostbyname:
6056             UNI(OP_GHBYNAME);
6057
6058         case KEY_gethostbyaddr:
6059             LOP(OP_GHBYADDR,XTERM);
6060
6061         case KEY_gethostent:
6062             FUN0(OP_GHOSTENT);
6063
6064         case KEY_getnetbyname:
6065             UNI(OP_GNBYNAME);
6066
6067         case KEY_getnetbyaddr:
6068             LOP(OP_GNBYADDR,XTERM);
6069
6070         case KEY_getnetent:
6071             FUN0(OP_GNETENT);
6072
6073         case KEY_getservbyname:
6074             LOP(OP_GSBYNAME,XTERM);
6075
6076         case KEY_getservbyport:
6077             LOP(OP_GSBYPORT,XTERM);
6078
6079         case KEY_getservent:
6080             FUN0(OP_GSERVENT);
6081
6082         case KEY_getsockname:
6083             UNI(OP_GETSOCKNAME);
6084
6085         case KEY_getsockopt:
6086             LOP(OP_GSOCKOPT,XTERM);
6087
6088         case KEY_getgrent:
6089             FUN0(OP_GGRENT);
6090
6091         case KEY_getgrnam:
6092             UNI(OP_GGRNAM);
6093
6094         case KEY_getgrgid:
6095             UNI(OP_GGRGID);
6096
6097         case KEY_getlogin:
6098             FUN0(OP_GETLOGIN);
6099
6100         case KEY_given:
6101             yylval.ival = CopLINE(PL_curcop);
6102             OPERATOR(GIVEN);
6103
6104         case KEY_glob:
6105             LOP(OP_GLOB,XTERM);
6106
6107         case KEY_hex:
6108             UNI(OP_HEX);
6109
6110         case KEY_if:
6111             yylval.ival = CopLINE(PL_curcop);
6112             OPERATOR(IF);
6113
6114         case KEY_index:
6115             LOP(OP_INDEX,XTERM);
6116
6117         case KEY_int:
6118             UNI(OP_INT);
6119
6120         case KEY_ioctl:
6121             LOP(OP_IOCTL,XTERM);
6122
6123         case KEY_join:
6124             LOP(OP_JOIN,XTERM);
6125
6126         case KEY_keys:
6127             UNI(OP_KEYS);
6128
6129         case KEY_kill:
6130             LOP(OP_KILL,XTERM);
6131
6132         case KEY_last:
6133             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6134             LOOPX(OP_LAST);
6135
6136         case KEY_lc:
6137             UNI(OP_LC);
6138
6139         case KEY_lcfirst:
6140             UNI(OP_LCFIRST);
6141
6142         case KEY_local:
6143             yylval.ival = 0;
6144             OPERATOR(LOCAL);
6145
6146         case KEY_length:
6147             UNI(OP_LENGTH);
6148
6149         case KEY_lt:
6150             Rop(OP_SLT);
6151
6152         case KEY_le:
6153             Rop(OP_SLE);
6154
6155         case KEY_localtime:
6156             UNI(OP_LOCALTIME);
6157
6158         case KEY_log:
6159             UNI(OP_LOG);
6160
6161         case KEY_link:
6162             LOP(OP_LINK,XTERM);
6163
6164         case KEY_listen:
6165             LOP(OP_LISTEN,XTERM);
6166
6167         case KEY_lock:
6168             UNI(OP_LOCK);
6169
6170         case KEY_lstat:
6171             UNI(OP_LSTAT);
6172
6173         case KEY_m:
6174             s = scan_pat(s,OP_MATCH);
6175             TERM(sublex_start());
6176
6177         case KEY_map:
6178             LOP(OP_MAPSTART, XREF);
6179
6180         case KEY_mkdir:
6181             LOP(OP_MKDIR,XTERM);
6182
6183         case KEY_msgctl:
6184             LOP(OP_MSGCTL,XTERM);
6185
6186         case KEY_msgget:
6187             LOP(OP_MSGGET,XTERM);
6188
6189         case KEY_msgrcv:
6190             LOP(OP_MSGRCV,XTERM);
6191
6192         case KEY_msgsnd:
6193             LOP(OP_MSGSND,XTERM);
6194
6195         case KEY_our:
6196         case KEY_my:
6197         case KEY_state:
6198             PL_in_my = (U16)tmp;
6199             s = SKIPSPACE1(s);
6200             if (isIDFIRST_lazy_if(s,UTF)) {
6201 #ifdef PERL_MAD
6202                 char* start = s;
6203 #endif
6204                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6205                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6206                     goto really_sub;
6207                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6208                 if (!PL_in_my_stash) {
6209                     char tmpbuf[1024];
6210                     PL_bufptr = s;
6211                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6212                     yyerror(tmpbuf);
6213                 }
6214 #ifdef PERL_MAD
6215                 if (PL_madskills) {     /* just add type to declarator token */
6216                     sv_catsv(PL_thistoken, PL_nextwhite);
6217                     PL_nextwhite = 0;
6218                     sv_catpvn(PL_thistoken, start, s - start);
6219                 }
6220 #endif
6221             }
6222             yylval.ival = 1;
6223             OPERATOR(MY);
6224
6225         case KEY_next:
6226             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6227             LOOPX(OP_NEXT);
6228
6229         case KEY_ne:
6230             Eop(OP_SNE);
6231
6232         case KEY_no:
6233             s = tokenize_use(0, s);
6234             OPERATOR(USE);
6235
6236         case KEY_not:
6237             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6238                 FUN1(OP_NOT);
6239             else
6240                 OPERATOR(NOTOP);
6241
6242         case KEY_open:
6243             s = SKIPSPACE1(s);
6244             if (isIDFIRST_lazy_if(s,UTF)) {
6245                 const char *t;
6246                 for (d = s; isALNUM_lazy_if(d,UTF);)
6247                     d++;
6248                 for (t=d; isSPACE(*t);)
6249                     t++;
6250                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6251                     /* [perl #16184] */
6252                     && !(t[0] == '=' && t[1] == '>')
6253                 ) {
6254                     int parms_len = (int)(d-s);
6255                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6256                            "Precedence problem: open %.*s should be open(%.*s)",
6257                             parms_len, s, parms_len, s);
6258                 }
6259             }
6260             LOP(OP_OPEN,XTERM);
6261
6262         case KEY_or:
6263             yylval.ival = OP_OR;
6264             OPERATOR(OROP);
6265
6266         case KEY_ord:
6267             UNI(OP_ORD);
6268
6269         case KEY_oct:
6270             UNI(OP_OCT);
6271
6272         case KEY_opendir:
6273             LOP(OP_OPEN_DIR,XTERM);
6274
6275         case KEY_print:
6276             checkcomma(s,PL_tokenbuf,"filehandle");
6277             LOP(OP_PRINT,XREF);
6278
6279         case KEY_printf:
6280             checkcomma(s,PL_tokenbuf,"filehandle");
6281             LOP(OP_PRTF,XREF);
6282
6283         case KEY_prototype:
6284             UNI(OP_PROTOTYPE);
6285
6286         case KEY_push:
6287             LOP(OP_PUSH,XTERM);
6288
6289         case KEY_pop:
6290             UNIDOR(OP_POP);
6291
6292         case KEY_pos:
6293             UNIDOR(OP_POS);
6294
6295         case KEY_pack:
6296             LOP(OP_PACK,XTERM);
6297
6298         case KEY_package:
6299             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6300             OPERATOR(PACKAGE);
6301
6302         case KEY_pipe:
6303             LOP(OP_PIPE_OP,XTERM);
6304
6305         case KEY_q:
6306             s = scan_str(s,!!PL_madskills,FALSE);
6307             if (!s)
6308                 missingterm(NULL);
6309             yylval.ival = OP_CONST;
6310             TERM(sublex_start());
6311
6312         case KEY_quotemeta:
6313             UNI(OP_QUOTEMETA);
6314
6315         case KEY_qw:
6316             s = scan_str(s,!!PL_madskills,FALSE);
6317             if (!s)
6318                 missingterm(NULL);
6319             PL_expect = XOPERATOR;
6320             force_next(')');
6321             if (SvCUR(PL_lex_stuff)) {
6322                 OP *words = NULL;
6323                 int warned = 0;
6324                 d = SvPV_force(PL_lex_stuff, len);
6325                 while (len) {
6326                     for (; isSPACE(*d) && len; --len, ++d)
6327                         /**/;
6328                     if (len) {
6329                         SV *sv;
6330                         const char *b = d;
6331                         if (!warned && ckWARN(WARN_QW)) {
6332                             for (; !isSPACE(*d) && len; --len, ++d) {
6333                                 if (*d == ',') {
6334                                     Perl_warner(aTHX_ packWARN(WARN_QW),
6335                                         "Possible attempt to separate words with commas");
6336                                     ++warned;
6337                                 }
6338                                 else if (*d == '#') {
6339                                     Perl_warner(aTHX_ packWARN(WARN_QW),
6340                                         "Possible attempt to put comments in qw() list");
6341                                     ++warned;
6342                                 }
6343                             }
6344                         }
6345                         else {
6346                             for (; !isSPACE(*d) && len; --len, ++d)
6347                                 /**/;
6348                         }
6349                         sv = newSVpvn(b, d-b);
6350                         if (DO_UTF8(PL_lex_stuff))
6351                             SvUTF8_on(sv);
6352                         words = append_elem(OP_LIST, words,
6353                                             newSVOP(OP_CONST, 0, tokeq(sv)));
6354                     }
6355                 }
6356                 if (words) {
6357                     start_force(PL_curforce);
6358                     NEXTVAL_NEXTTOKE.opval = words;
6359                     force_next(THING);
6360                 }
6361             }
6362             if (PL_lex_stuff) {
6363                 SvREFCNT_dec(PL_lex_stuff);
6364                 PL_lex_stuff = NULL;
6365             }
6366             PL_expect = XTERM;
6367             TOKEN('(');
6368
6369         case KEY_qq:
6370             s = scan_str(s,!!PL_madskills,FALSE);
6371             if (!s)
6372                 missingterm(NULL);
6373             yylval.ival = OP_STRINGIFY;
6374             if (SvIVX(PL_lex_stuff) == '\'')
6375                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
6376             TERM(sublex_start());
6377
6378         case KEY_qr:
6379             s = scan_pat(s,OP_QR);
6380             TERM(sublex_start());
6381
6382         case KEY_qx:
6383             s = scan_str(s,!!PL_madskills,FALSE);
6384             if (!s)
6385                 missingterm(NULL);
6386             readpipe_override();
6387             TERM(sublex_start());
6388
6389         case KEY_return:
6390             OLDLOP(OP_RETURN);
6391
6392         case KEY_require:
6393             s = SKIPSPACE1(s);
6394             if (isDIGIT(*s)) {
6395                 s = force_version(s, FALSE);
6396             }
6397             else if (*s != 'v' || !isDIGIT(s[1])
6398                     || (s = force_version(s, TRUE), *s == 'v'))
6399             {
6400                 *PL_tokenbuf = '\0';
6401                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6402                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6403                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6404                 else if (*s == '<')
6405                     yyerror("<> should be quotes");
6406             }
6407             if (orig_keyword == KEY_require) {
6408                 orig_keyword = 0;
6409                 yylval.ival = 1;
6410             }
6411             else
6412                 yylval.ival = 0;
6413             PL_expect = XTERM;
6414             PL_bufptr = s;
6415             PL_last_uni = PL_oldbufptr;
6416             PL_last_lop_op = OP_REQUIRE;
6417             s = skipspace(s);
6418             return REPORT( (int)REQUIRE );
6419
6420         case KEY_reset:
6421             UNI(OP_RESET);
6422
6423         case KEY_redo:
6424             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6425             LOOPX(OP_REDO);
6426
6427         case KEY_rename:
6428             LOP(OP_RENAME,XTERM);
6429
6430         case KEY_rand:
6431             UNI(OP_RAND);
6432
6433         case KEY_rmdir:
6434             UNI(OP_RMDIR);
6435
6436         case KEY_rindex:
6437             LOP(OP_RINDEX,XTERM);
6438
6439         case KEY_read:
6440             LOP(OP_READ,XTERM);
6441
6442         case KEY_readdir:
6443             UNI(OP_READDIR);
6444
6445         case KEY_readline:
6446             UNIDOR(OP_READLINE);
6447
6448         case KEY_readpipe:
6449             UNIDOR(OP_BACKTICK);
6450
6451         case KEY_rewinddir:
6452             UNI(OP_REWINDDIR);
6453
6454         case KEY_recv:
6455             LOP(OP_RECV,XTERM);
6456
6457         case KEY_reverse:
6458             LOP(OP_REVERSE,XTERM);
6459
6460         case KEY_readlink:
6461             UNIDOR(OP_READLINK);
6462
6463         case KEY_ref:
6464             UNI(OP_REF);
6465
6466         case KEY_s:
6467             s = scan_subst(s);
6468             if (yylval.opval)
6469                 TERM(sublex_start());
6470             else
6471                 TOKEN(1);       /* force error */
6472
6473         case KEY_say:
6474             checkcomma(s,PL_tokenbuf,"filehandle");
6475             LOP(OP_SAY,XREF);
6476
6477         case KEY_chomp:
6478             UNI(OP_CHOMP);
6479
6480         case KEY_scalar:
6481             UNI(OP_SCALAR);
6482
6483         case KEY_select:
6484             LOP(OP_SELECT,XTERM);
6485
6486         case KEY_seek:
6487             LOP(OP_SEEK,XTERM);
6488
6489         case KEY_semctl:
6490             LOP(OP_SEMCTL,XTERM);
6491
6492         case KEY_semget:
6493             LOP(OP_SEMGET,XTERM);
6494
6495         case KEY_semop:
6496             LOP(OP_SEMOP,XTERM);
6497
6498         case KEY_send:
6499             LOP(OP_SEND,XTERM);
6500
6501         case KEY_setpgrp:
6502             LOP(OP_SETPGRP,XTERM);
6503
6504         case KEY_setpriority:
6505             LOP(OP_SETPRIORITY,XTERM);
6506
6507         case KEY_sethostent:
6508             UNI(OP_SHOSTENT);
6509
6510         case KEY_setnetent:
6511             UNI(OP_SNETENT);
6512
6513         case KEY_setservent:
6514             UNI(OP_SSERVENT);
6515
6516         case KEY_setprotoent:
6517             UNI(OP_SPROTOENT);
6518
6519         case KEY_setpwent:
6520             FUN0(OP_SPWENT);
6521
6522         case KEY_setgrent:
6523             FUN0(OP_SGRENT);
6524
6525         case KEY_seekdir:
6526             LOP(OP_SEEKDIR,XTERM);
6527
6528         case KEY_setsockopt:
6529             LOP(OP_SSOCKOPT,XTERM);
6530
6531         case KEY_shift:
6532             UNIDOR(OP_SHIFT);
6533
6534         case KEY_shmctl:
6535             LOP(OP_SHMCTL,XTERM);
6536
6537         case KEY_shmget:
6538             LOP(OP_SHMGET,XTERM);
6539
6540         case KEY_shmread:
6541             LOP(OP_SHMREAD,XTERM);
6542
6543         case KEY_shmwrite:
6544             LOP(OP_SHMWRITE,XTERM);
6545
6546         case KEY_shutdown:
6547             LOP(OP_SHUTDOWN,XTERM);
6548
6549         case KEY_sin:
6550             UNI(OP_SIN);
6551
6552         case KEY_sleep:
6553             UNI(OP_SLEEP);
6554
6555         case KEY_socket:
6556             LOP(OP_SOCKET,XTERM);
6557
6558         case KEY_socketpair:
6559             LOP(OP_SOCKPAIR,XTERM);
6560
6561         case KEY_sort:
6562             checkcomma(s,PL_tokenbuf,"subroutine name");
6563             s = SKIPSPACE1(s);
6564             if (*s == ';' || *s == ')')         /* probably a close */
6565                 Perl_croak(aTHX_ "sort is now a reserved word");
6566             PL_expect = XTERM;
6567             s = force_word(s,WORD,TRUE,TRUE,FALSE);
6568             LOP(OP_SORT,XREF);
6569
6570         case KEY_split:
6571             LOP(OP_SPLIT,XTERM);
6572
6573         case KEY_sprintf:
6574             LOP(OP_SPRINTF,XTERM);
6575
6576         case KEY_splice:
6577             LOP(OP_SPLICE,XTERM);
6578
6579         case KEY_sqrt:
6580             UNI(OP_SQRT);
6581
6582         case KEY_srand:
6583             UNI(OP_SRAND);
6584
6585         case KEY_stat:
6586             UNI(OP_STAT);
6587
6588         case KEY_study:
6589             UNI(OP_STUDY);
6590
6591         case KEY_substr:
6592             LOP(OP_SUBSTR,XTERM);
6593
6594         case KEY_format:
6595         case KEY_sub:
6596           really_sub:
6597             {
6598                 char tmpbuf[sizeof PL_tokenbuf];
6599                 SSize_t tboffset = 0;
6600                 expectation attrful;
6601                 bool have_name, have_proto;
6602                 const int key = tmp;
6603
6604 #ifdef PERL_MAD
6605                 SV *tmpwhite = 0;
6606
6607                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6608                 SV *subtoken = newSVpvn(tstart, s - tstart);
6609                 PL_thistoken = 0;
6610
6611                 d = s;
6612                 s = SKIPSPACE2(s,tmpwhite);
6613 #else
6614                 s = skipspace(s);
6615 #endif
6616
6617                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6618                     (*s == ':' && s[1] == ':'))
6619                 {
6620 #ifdef PERL_MAD
6621                     SV *nametoke;
6622 #endif
6623
6624                     PL_expect = XBLOCK;
6625                     attrful = XATTRBLOCK;
6626                     /* remember buffer pos'n for later force_word */
6627                     tboffset = s - PL_oldbufptr;
6628                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6629 #ifdef PERL_MAD
6630                     if (PL_madskills)
6631                         nametoke = newSVpvn(s, d - s);
6632 #endif
6633                     if (memchr(tmpbuf, ':', len))
6634                         sv_setpvn(PL_subname, tmpbuf, len);
6635                     else {
6636                         sv_setsv(PL_subname,PL_curstname);
6637                         sv_catpvs(PL_subname,"::");
6638                         sv_catpvn(PL_subname,tmpbuf,len);
6639                     }
6640                     have_name = TRUE;
6641
6642 #ifdef PERL_MAD
6643
6644                     start_force(0);
6645                     CURMAD('X', nametoke);
6646                     CURMAD('_', tmpwhite);
6647                     (void) force_word(PL_oldbufptr + tboffset, WORD,
6648                                       FALSE, TRUE, TRUE);
6649
6650                     s = SKIPSPACE2(d,tmpwhite);
6651 #else
6652                     s = skipspace(d);
6653 #endif
6654                 }
6655                 else {
6656                     if (key == KEY_my)
6657                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
6658                     PL_expect = XTERMBLOCK;
6659                     attrful = XATTRTERM;
6660                     sv_setpvn(PL_subname,"?",1);
6661                     have_name = FALSE;
6662                 }
6663
6664                 if (key == KEY_format) {
6665                     if (*s == '=')
6666                         PL_lex_formbrack = PL_lex_brackets + 1;
6667 #ifdef PERL_MAD
6668                     PL_thistoken = subtoken;
6669                     s = d;
6670 #else
6671                     if (have_name)
6672                         (void) force_word(PL_oldbufptr + tboffset, WORD,
6673                                           FALSE, TRUE, TRUE);
6674 #endif
6675                     OPERATOR(FORMAT);
6676                 }
6677
6678                 /* Look for a prototype */
6679                 if (*s == '(') {
6680                     char *p;
6681                     bool bad_proto = FALSE;
6682                     const bool warnsyntax = ckWARN(WARN_SYNTAX);
6683
6684                     s = scan_str(s,!!PL_madskills,FALSE);
6685                     if (!s)
6686                         Perl_croak(aTHX_ "Prototype not terminated");
6687                     /* strip spaces and check for bad characters */
6688                     d = SvPVX(PL_lex_stuff);
6689                     tmp = 0;
6690                     for (p = d; *p; ++p) {
6691                         if (!isSPACE(*p)) {
6692                             d[tmp++] = *p;
6693                             if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6694                                 bad_proto = TRUE;
6695                         }
6696                     }
6697                     d[tmp] = '\0';
6698                     if (bad_proto)
6699                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6700                                     "Illegal character in prototype for %"SVf" : %s",
6701                                     SVfARG(PL_subname), d);
6702                     SvCUR_set(PL_lex_stuff, tmp);
6703                     have_proto = TRUE;
6704
6705 #ifdef PERL_MAD
6706                     start_force(0);
6707                     CURMAD('q', PL_thisopen);
6708                     CURMAD('_', tmpwhite);
6709                     CURMAD('=', PL_thisstuff);
6710                     CURMAD('Q', PL_thisclose);
6711                     NEXTVAL_NEXTTOKE.opval =
6712                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6713                     PL_lex_stuff = NULL;
6714                     force_next(THING);
6715
6716                     s = SKIPSPACE2(s,tmpwhite);
6717 #else
6718                     s = skipspace(s);
6719 #endif
6720                 }
6721                 else
6722                     have_proto = FALSE;
6723
6724                 if (*s == ':' && s[1] != ':')
6725                     PL_expect = attrful;
6726                 else if (*s != '{' && key == KEY_sub) {
6727                     if (!have_name)
6728                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6729                     else if (*s != ';')
6730                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6731                 }
6732
6733 #ifdef PERL_MAD
6734                 start_force(0);
6735                 if (tmpwhite) {
6736                     if (PL_madskills)
6737                         curmad('^', newSVpvs(""));
6738                     CURMAD('_', tmpwhite);
6739                 }
6740                 force_next(0);
6741
6742                 PL_thistoken = subtoken;
6743 #else
6744                 if (have_proto) {
6745                     NEXTVAL_NEXTTOKE.opval =
6746                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6747                     PL_lex_stuff = NULL;
6748                     force_next(THING);
6749                 }
6750 #endif
6751                 if (!have_name) {
6752                     sv_setpv(PL_subname,
6753                              (const char *)
6754                              (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6755                     TOKEN(ANONSUB);
6756                 }
6757 #ifndef PERL_MAD
6758                 (void) force_word(PL_oldbufptr + tboffset, WORD,
6759                                   FALSE, TRUE, TRUE);
6760 #endif
6761                 if (key == KEY_my)
6762                     TOKEN(MYSUB);
6763                 TOKEN(SUB);
6764             }
6765
6766         case KEY_system:
6767             LOP(OP_SYSTEM,XREF);
6768
6769         case KEY_symlink:
6770             LOP(OP_SYMLINK,XTERM);
6771
6772         case KEY_syscall:
6773             LOP(OP_SYSCALL,XTERM);
6774
6775         case KEY_sysopen:
6776             LOP(OP_SYSOPEN,XTERM);
6777
6778         case KEY_sysseek:
6779             LOP(OP_SYSSEEK,XTERM);
6780
6781         case KEY_sysread:
6782             LOP(OP_SYSREAD,XTERM);
6783
6784         case KEY_syswrite:
6785             LOP(OP_SYSWRITE,XTERM);
6786
6787         case KEY_tr:
6788             s = scan_trans(s);
6789             TERM(sublex_start());
6790
6791         case KEY_tell:
6792             UNI(OP_TELL);
6793
6794         case KEY_telldir:
6795             UNI(OP_TELLDIR);
6796
6797         case KEY_tie:
6798             LOP(OP_TIE,XTERM);
6799
6800         case KEY_tied:
6801             UNI(OP_TIED);
6802
6803         case KEY_time:
6804             FUN0(OP_TIME);
6805
6806         case KEY_times:
6807             FUN0(OP_TMS);
6808
6809         case KEY_truncate:
6810             LOP(OP_TRUNCATE,XTERM);
6811
6812         case KEY_uc:
6813             UNI(OP_UC);
6814
6815         case KEY_ucfirst:
6816             UNI(OP_UCFIRST);
6817
6818         case KEY_untie:
6819             UNI(OP_UNTIE);
6820
6821         case KEY_until:
6822             yylval.ival = CopLINE(PL_curcop);
6823             OPERATOR(UNTIL);
6824
6825         case KEY_unless:
6826             yylval.ival = CopLINE(PL_curcop);
6827             OPERATOR(UNLESS);
6828
6829         case KEY_unlink:
6830             LOP(OP_UNLINK,XTERM);
6831
6832         case KEY_undef:
6833             UNIDOR(OP_UNDEF);
6834
6835         case KEY_unpack:
6836             LOP(OP_UNPACK,XTERM);
6837
6838         case KEY_utime:
6839             LOP(OP_UTIME,XTERM);
6840
6841         case KEY_umask:
6842             UNIDOR(OP_UMASK);
6843
6844         case KEY_unshift:
6845             LOP(OP_UNSHIFT,XTERM);
6846
6847         case KEY_use:
6848             s = tokenize_use(1, s);
6849             OPERATOR(USE);
6850
6851         case KEY_values:
6852             UNI(OP_VALUES);
6853
6854         case KEY_vec:
6855             LOP(OP_VEC,XTERM);
6856
6857         case KEY_when:
6858             yylval.ival = CopLINE(PL_curcop);
6859             OPERATOR(WHEN);
6860
6861         case KEY_while:
6862             yylval.ival = CopLINE(PL_curcop);
6863             OPERATOR(WHILE);
6864
6865         case KEY_warn:
6866             PL_hints |= HINT_BLOCK_SCOPE;
6867             LOP(OP_WARN,XTERM);
6868
6869         case KEY_wait:
6870             FUN0(OP_WAIT);
6871
6872         case KEY_waitpid:
6873             LOP(OP_WAITPID,XTERM);
6874
6875         case KEY_wantarray:
6876             FUN0(OP_WANTARRAY);
6877
6878         case KEY_write:
6879 #ifdef EBCDIC
6880         {
6881             char ctl_l[2];
6882             ctl_l[0] = toCTRL('L');
6883             ctl_l[1] = '\0';
6884             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6885         }
6886 #else
6887             /* Make sure $^L is defined */
6888             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6889 #endif
6890             UNI(OP_ENTERWRITE);
6891
6892         case KEY_x:
6893             if (PL_expect == XOPERATOR)
6894                 Mop(OP_REPEAT);
6895             check_uni();
6896             goto just_a_word;
6897
6898         case KEY_xor:
6899             yylval.ival = OP_XOR;
6900             OPERATOR(OROP);
6901
6902         case KEY_y:
6903             s = scan_trans(s);
6904             TERM(sublex_start());
6905         }
6906     }}
6907 }
6908 #ifdef __SC__
6909 #pragma segment Main
6910 #endif
6911
6912 static int
6913 S_pending_ident(pTHX)
6914 {
6915     dVAR;
6916     register char *d;
6917     PADOFFSET tmp = 0;
6918     /* pit holds the identifier we read and pending_ident is reset */
6919     char pit = PL_pending_ident;
6920     PL_pending_ident = 0;
6921
6922     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6923     DEBUG_T({ PerlIO_printf(Perl_debug_log,
6924           "### Pending identifier '%s'\n", PL_tokenbuf); });
6925
6926     /* if we're in a my(), we can't allow dynamics here.
6927        $foo'bar has already been turned into $foo::bar, so
6928        just check for colons.
6929
6930        if it's a legal name, the OP is a PADANY.
6931     */
6932     if (PL_in_my) {
6933         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
6934             if (strchr(PL_tokenbuf,':'))
6935                 yyerror(Perl_form(aTHX_ "No package name allowed for "
6936                                   "variable %s in \"our\"",
6937                                   PL_tokenbuf));
6938             tmp = allocmy(PL_tokenbuf);
6939         }
6940         else {
6941             if (strchr(PL_tokenbuf,':'))
6942                 yyerror(Perl_form(aTHX_ PL_no_myglob,
6943                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6944
6945             yylval.opval = newOP(OP_PADANY, 0);
6946             yylval.opval->op_targ = allocmy(PL_tokenbuf);
6947             return PRIVATEREF;
6948         }
6949     }
6950
6951     /*
6952        build the ops for accesses to a my() variable.
6953
6954        Deny my($a) or my($b) in a sort block, *if* $a or $b is
6955        then used in a comparison.  This catches most, but not
6956        all cases.  For instance, it catches
6957            sort { my($a); $a <=> $b }
6958        but not
6959            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6960        (although why you'd do that is anyone's guess).
6961     */
6962
6963     if (!strchr(PL_tokenbuf,':')) {
6964         if (!PL_in_my)
6965             tmp = pad_findmy(PL_tokenbuf);
6966         if (tmp != NOT_IN_PAD) {
6967             /* might be an "our" variable" */
6968             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6969                 /* build ops for a bareword */
6970                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
6971                 HEK * const stashname = HvNAME_HEK(stash);
6972                 SV *  const sym = newSVhek(stashname);
6973                 sv_catpvs(sym, "::");
6974                 sv_catpv(sym, PL_tokenbuf+1);
6975                 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6976                 yylval.opval->op_private = OPpCONST_ENTERED;
6977                 gv_fetchsv(sym,
6978                     (PL_in_eval
6979                         ? (GV_ADDMULTI | GV_ADDINEVAL)
6980                         : GV_ADDMULTI
6981                     ),
6982                     ((PL_tokenbuf[0] == '$') ? SVt_PV
6983                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6984                      : SVt_PVHV));
6985                 return WORD;
6986             }
6987
6988             /* if it's a sort block and they're naming $a or $b */
6989             if (PL_last_lop_op == OP_SORT &&
6990                 PL_tokenbuf[0] == '$' &&
6991                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6992                 && !PL_tokenbuf[2])
6993             {
6994                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6995                      d < PL_bufend && *d != '\n';
6996                      d++)
6997                 {
6998                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6999                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7000                               PL_tokenbuf);
7001                     }
7002                 }
7003             }
7004
7005             yylval.opval = newOP(OP_PADANY, 0);
7006             yylval.opval->op_targ = tmp;
7007             return PRIVATEREF;
7008         }
7009     }
7010
7011     /*
7012        Whine if they've said @foo in a doublequoted string,
7013        and @foo isn't a variable we can find in the symbol
7014        table.
7015     */
7016     if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7017         GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
7018         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7019                 && ckWARN(WARN_AMBIGUOUS)
7020                 /* DO NOT warn for @- and @+ */
7021                 && !( PL_tokenbuf[2] == '\0' &&
7022                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7023            )
7024         {
7025             /* Downgraded from fatal to warning 20000522 mjd */
7026             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7027                         "Possible unintended interpolation of %s in string",
7028                          PL_tokenbuf);
7029         }
7030     }
7031
7032     /* build ops for a bareword */
7033     yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
7034     yylval.opval->op_private = OPpCONST_ENTERED;
7035     gv_fetchpv(
7036             PL_tokenbuf+1,
7037             /* If the identifier refers to a stash, don't autovivify it.
7038              * Change 24660 had the side effect of causing symbol table
7039              * hashes to always be defined, even if they were freshly
7040              * created and the only reference in the entire program was
7041              * the single statement with the defined %foo::bar:: test.
7042              * It appears that all code in the wild doing this actually
7043              * wants to know whether sub-packages have been loaded, so
7044              * by avoiding auto-vivifying symbol tables, we ensure that
7045              * defined %foo::bar:: continues to be false, and the existing
7046              * tests still give the expected answers, even though what
7047              * they're actually testing has now changed subtly.
7048              */
7049             (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7050              ? 0
7051              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7052             ((PL_tokenbuf[0] == '$') ? SVt_PV
7053              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7054              : SVt_PVHV));
7055     return WORD;
7056 }
7057
7058 /*
7059  *  The following code was generated by perl_keyword.pl.
7060  */
7061
7062 I32
7063 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7064 {
7065     dVAR;
7066   switch (len)
7067   {
7068     case 1: /* 5 tokens of length 1 */
7069       switch (name[0])
7070       {
7071         case 'm':
7072           {                                       /* m          */
7073             return KEY_m;
7074           }
7075
7076         case 'q':
7077           {                                       /* q          */
7078             return KEY_q;
7079           }
7080
7081         case 's':
7082           {                                       /* s          */
7083             return KEY_s;
7084           }
7085
7086         case 'x':
7087           {                                       /* x          */
7088             return -KEY_x;
7089           }
7090
7091         case 'y':
7092           {                                       /* y          */
7093             return KEY_y;
7094           }
7095
7096         default:
7097           goto unknown;
7098       }
7099
7100     case 2: /* 18 tokens of length 2 */
7101       switch (name[0])
7102       {
7103         case 'd':
7104           if (name[1] == 'o')
7105           {                                       /* do         */
7106             return KEY_do;
7107           }
7108
7109           goto unknown;
7110
7111         case 'e':
7112           if (name[1] == 'q')
7113           {                                       /* eq         */
7114             return -KEY_eq;
7115           }
7116
7117           goto unknown;
7118
7119         case 'g':
7120           switch (name[1])
7121           {
7122             case 'e':
7123               {                                   /* ge         */
7124                 return -KEY_ge;
7125               }
7126
7127             case 't':
7128               {                                   /* gt         */
7129                 return -KEY_gt;
7130               }
7131
7132             default:
7133               goto unknown;
7134           }
7135
7136         case 'i':
7137           if (name[1] == 'f')
7138           {                                       /* if         */
7139             return KEY_if;
7140           }
7141
7142           goto unknown;
7143
7144         case 'l':
7145           switch (name[1])
7146           {
7147             case 'c':
7148               {                                   /* lc         */
7149                 return -KEY_lc;
7150               }
7151
7152             case 'e':
7153               {                                   /* le         */
7154                 return -KEY_le;
7155               }
7156
7157             case 't':
7158               {                                   /* lt         */
7159                 return -KEY_lt;
7160               }
7161
7162             default:
7163               goto unknown;
7164           }
7165
7166         case 'm':
7167           if (name[1] == 'y')
7168           {                                       /* my         */
7169             return KEY_my;
7170           }
7171
7172           goto unknown;
7173
7174         case 'n':
7175           switch (name[1])
7176           {
7177             case 'e':
7178               {                                   /* ne         */
7179                 return -KEY_ne;
7180               }
7181
7182             case 'o':
7183               {                                   /* no         */
7184                 return KEY_no;
7185               }
7186
7187             default:
7188               goto unknown;
7189           }
7190
7191         case 'o':
7192           if (name[1] == 'r')
7193           {                                       /* or         */
7194             return -KEY_or;
7195           }
7196
7197           goto unknown;
7198
7199         case 'q':
7200           switch (name[1])
7201           {
7202             case 'q':
7203               {                                   /* qq         */
7204                 return KEY_qq;
7205               }
7206
7207             case 'r':
7208               {                                   /* qr         */
7209                 return KEY_qr;
7210               }
7211
7212             case 'w':
7213               {                                   /* qw         */
7214                 return KEY_qw;
7215               }
7216
7217             case 'x':
7218               {                                   /* qx         */
7219                 return KEY_qx;
7220               }
7221
7222             default:
7223               goto unknown;
7224           }
7225
7226         case 't':
7227           if (name[1] == 'r')
7228           {                                       /* tr         */
7229             return KEY_tr;
7230           }
7231
7232           goto unknown;
7233
7234         case 'u':
7235           if (name[1] == 'c')
7236           {                                       /* uc         */
7237             return -KEY_uc;
7238           }
7239
7240           goto unknown;
7241
7242         default:
7243           goto unknown;
7244       }
7245
7246     case 3: /* 29 tokens of length 3 */
7247       switch (name[0])
7248       {
7249         case 'E':
7250           if (name[1] == 'N' &&
7251               name[2] == 'D')
7252           {                                       /* END        */
7253             return KEY_END;
7254           }
7255
7256           goto unknown;
7257
7258         case 'a':
7259           switch (name[1])
7260           {
7261             case 'b':
7262               if (name[2] == 's')
7263               {                                   /* abs        */
7264                 return -KEY_abs;
7265               }
7266
7267               goto unknown;
7268
7269             case 'n':
7270               if (name[2] == 'd')
7271               {                                   /* and        */
7272                 return -KEY_and;
7273               }
7274
7275               goto unknown;
7276
7277             default:
7278               goto unknown;
7279           }
7280
7281         case 'c':
7282           switch (name[1])
7283           {
7284             case 'h':
7285               if (name[2] == 'r')
7286               {                                   /* chr        */
7287                 return -KEY_chr;
7288               }
7289
7290               goto unknown;
7291
7292             case 'm':
7293               if (name[2] == 'p')
7294               {                                   /* cmp        */
7295                 return -KEY_cmp;
7296               }
7297
7298               goto unknown;
7299
7300             case 'o':
7301               if (name[2] == 's')
7302               {                                   /* cos        */
7303                 return -KEY_cos;
7304               }
7305
7306               goto unknown;
7307
7308             default:
7309               goto unknown;
7310           }
7311
7312         case 'd':
7313           if (name[1] == 'i' &&
7314               name[2] == 'e')
7315           {                                       /* die        */
7316             return -KEY_die;
7317           }
7318
7319           goto unknown;
7320
7321         case 'e':
7322           switch (name[1])
7323           {
7324             case 'o':
7325               if (name[2] == 'f')
7326               {                                   /* eof        */
7327                 return -KEY_eof;
7328               }
7329
7330               goto unknown;
7331
7332             case 'x':
7333               if (name[2] == 'p')
7334               {                                   /* exp        */
7335                 return -KEY_exp;
7336               }
7337
7338               goto unknown;
7339
7340             default:
7341               goto unknown;
7342           }
7343
7344         case 'f':
7345           if (name[1] == 'o' &&
7346               name[2] == 'r')
7347           {                                       /* for        */
7348             return KEY_for;
7349           }
7350
7351           goto unknown;
7352
7353         case 'h':
7354           if (name[1] == 'e' &&
7355               name[2] == 'x')
7356           {                                       /* hex        */
7357             return -KEY_hex;
7358           }
7359
7360           goto unknown;
7361
7362         case 'i':
7363           if (name[1] == 'n' &&
7364               name[2] == 't')
7365           {                                       /* int        */
7366             return -KEY_int;
7367           }
7368
7369           goto unknown;
7370
7371         case 'l':
7372           if (name[1] == 'o' &&
7373               name[2] == 'g')
7374           {                                       /* log        */
7375             return -KEY_log;
7376           }
7377
7378           goto unknown;
7379
7380         case 'm':
7381           if (name[1] == 'a' &&
7382               name[2] == 'p')
7383           {                                       /* map        */
7384             return KEY_map;
7385           }
7386
7387           goto unknown;
7388
7389         case 'n':
7390           if (name[1] == 'o' &&
7391               name[2] == 't')
7392           {                                       /* not        */
7393             return -KEY_not;
7394           }
7395
7396           goto unknown;
7397
7398         case 'o':
7399           switch (name[1])
7400           {
7401             case 'c':
7402               if (name[2] == 't')
7403               {                                   /* oct        */
7404                 return -KEY_oct;
7405               }
7406
7407               goto unknown;
7408
7409             case 'r':
7410               if (name[2] == 'd')
7411               {                                   /* ord        */
7412                 return -KEY_ord;
7413               }
7414
7415               goto unknown;
7416
7417             case 'u':
7418               if (name[2] == 'r')
7419               {                                   /* our        */
7420                 return KEY_our;
7421               }
7422
7423               goto unknown;
7424
7425             default:
7426               goto unknown;
7427           }
7428
7429         case 'p':
7430           if (name[1] == 'o')
7431           {
7432             switch (name[2])
7433             {
7434               case 'p':
7435                 {                                 /* pop        */
7436                   return -KEY_pop;
7437                 }
7438
7439               case 's':
7440                 {                                 /* pos        */
7441                   return KEY_pos;
7442                 }
7443
7444               default:
7445                 goto unknown;
7446             }
7447           }
7448
7449           goto unknown;
7450
7451         case 'r':
7452           if (name[1] == 'e' &&
7453               name[2] == 'f')
7454           {                                       /* ref        */
7455             return -KEY_ref;
7456           }
7457
7458           goto unknown;
7459
7460         case 's':
7461           switch (name[1])
7462           {
7463             case 'a':
7464               if (name[2] == 'y')
7465               {                                   /* say        */
7466                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7467               }
7468
7469               goto unknown;
7470
7471             case 'i':
7472               if (name[2] == 'n')
7473               {                                   /* sin        */
7474                 return -KEY_sin;
7475               }
7476
7477               goto unknown;
7478
7479             case 'u':
7480               if (name[2] == 'b')
7481               {                                   /* sub        */
7482                 return KEY_sub;
7483               }
7484
7485               goto unknown;
7486
7487             default:
7488               goto unknown;
7489           }
7490
7491         case 't':
7492           if (name[1] == 'i' &&
7493               name[2] == 'e')
7494           {                                       /* tie        */
7495             return KEY_tie;
7496           }
7497
7498           goto unknown;
7499
7500         case 'u':
7501           if (name[1] == 's' &&
7502               name[2] == 'e')
7503           {                                       /* use        */
7504             return KEY_use;
7505           }
7506
7507           goto unknown;
7508
7509         case 'v':
7510           if (name[1] == 'e' &&
7511               name[2] == 'c')
7512           {                                       /* vec        */
7513             return -KEY_vec;
7514           }
7515
7516           goto unknown;
7517
7518         case 'x':
7519           if (name[1] == 'o' &&
7520               name[2] == 'r')
7521           {                                       /* xor        */
7522             return -KEY_xor;
7523           }
7524
7525           goto unknown;
7526
7527         default:
7528           goto unknown;
7529       }
7530
7531     case 4: /* 41 tokens of length 4 */
7532       switch (name[0])
7533       {
7534         case 'C':
7535           if (name[1] == 'O' &&
7536               name[2] == 'R' &&
7537               name[3] == 'E')
7538           {                                       /* CORE       */
7539             return -KEY_CORE;
7540           }
7541
7542           goto unknown;
7543
7544         case 'I':
7545           if (name[1] == 'N' &&
7546               name[2] == 'I' &&
7547               name[3] == 'T')
7548           {                                       /* INIT       */
7549             return KEY_INIT;
7550           }
7551
7552           goto unknown;
7553
7554         case 'b':
7555           if (name[1] == 'i' &&
7556               name[2] == 'n' &&
7557               name[3] == 'd')
7558           {                                       /* bind       */
7559             return -KEY_bind;
7560           }
7561
7562           goto unknown;
7563
7564         case 'c':
7565           if (name[1] == 'h' &&
7566               name[2] == 'o' &&
7567               name[3] == 'p')
7568           {                                       /* chop       */
7569             return -KEY_chop;
7570           }
7571
7572           goto unknown;
7573
7574         case 'd':
7575           if (name[1] == 'u' &&
7576               name[2] == 'm' &&
7577               name[3] == 'p')
7578           {                                       /* dump       */
7579             return -KEY_dump;
7580           }
7581
7582           goto unknown;
7583
7584         case 'e':
7585           switch (name[1])
7586           {
7587             case 'a':
7588               if (name[2] == 'c' &&
7589                   name[3] == 'h')
7590               {                                   /* each       */
7591                 return -KEY_each;
7592               }
7593
7594               goto unknown;
7595
7596             case 'l':
7597               if (name[2] == 's' &&
7598                   name[3] == 'e')
7599               {                                   /* else       */
7600                 return KEY_else;
7601               }
7602
7603               goto unknown;
7604
7605             case 'v':
7606               if (name[2] == 'a' &&
7607                   name[3] == 'l')
7608               {                                   /* eval       */
7609                 return KEY_eval;
7610               }
7611
7612               goto unknown;
7613
7614             case 'x':
7615               switch (name[2])
7616               {
7617                 case 'e':
7618                   if (name[3] == 'c')
7619                   {                               /* exec       */
7620                     return -KEY_exec;
7621                   }
7622
7623                   goto unknown;
7624
7625                 case 'i':
7626                   if (name[3] == 't')
7627                   {                               /* exit       */
7628                     return -KEY_exit;
7629                   }
7630
7631                   goto unknown;
7632
7633                 default:
7634                   goto unknown;
7635               }
7636
7637             default:
7638               goto unknown;
7639           }
7640
7641         case 'f':
7642           if (name[1] == 'o' &&
7643               name[2] == 'r' &&
7644               name[3] == 'k')
7645           {                                       /* fork       */
7646             return -KEY_fork;
7647           }
7648
7649           goto unknown;
7650
7651         case 'g':
7652           switch (name[1])
7653           {
7654             case 'e':
7655               if (name[2] == 't' &&
7656                   name[3] == 'c')
7657               {                                   /* getc       */
7658                 return -KEY_getc;
7659               }
7660
7661               goto unknown;
7662
7663             case 'l':
7664               if (name[2] == 'o' &&
7665                   name[3] == 'b')
7666               {                                   /* glob       */
7667                 return KEY_glob;
7668               }
7669
7670               goto unknown;
7671
7672             case 'o':
7673               if (name[2] == 't' &&
7674                   name[3] == 'o')
7675               {                                   /* goto       */
7676                 return KEY_goto;
7677               }
7678
7679               goto unknown;
7680
7681             case 'r':
7682               if (name[2] == 'e' &&
7683                   name[3] == 'p')
7684               {                                   /* grep       */
7685                 return KEY_grep;
7686               }
7687
7688               goto unknown;
7689
7690             default:
7691               goto unknown;
7692           }
7693
7694         case 'j':
7695           if (name[1] == 'o' &&
7696               name[2] == 'i' &&
7697               name[3] == 'n')
7698           {                                       /* join       */
7699             return -KEY_join;
7700           }
7701
7702           goto unknown;
7703
7704         case 'k':
7705           switch (name[1])
7706           {
7707             case 'e':
7708               if (name[2] == 'y' &&
7709                   name[3] == 's')
7710               {                                   /* keys       */
7711                 return -KEY_keys;
7712               }
7713
7714               goto unknown;
7715
7716             case 'i':
7717               if (name[2] == 'l' &&
7718                   name[3] == 'l')
7719               {                                   /* kill       */
7720                 return -KEY_kill;
7721               }
7722
7723               goto unknown;
7724
7725             default:
7726               goto unknown;
7727           }
7728
7729         case 'l':
7730           switch (name[1])
7731           {
7732             case 'a':
7733               if (name[2] == 's' &&
7734                   name[3] == 't')
7735               {                                   /* last       */
7736                 return KEY_last;
7737               }
7738
7739               goto unknown;
7740
7741             case 'i':
7742               if (name[2] == 'n' &&
7743                   name[3] == 'k')
7744               {                                   /* link       */
7745                 return -KEY_link;
7746               }
7747
7748               goto unknown;
7749
7750             case 'o':
7751               if (name[2] == 'c' &&
7752                   name[3] == 'k')
7753               {                                   /* lock       */
7754                 return -KEY_lock;
7755               }
7756
7757               goto unknown;
7758
7759             default:
7760               goto unknown;
7761           }
7762
7763         case 'n':
7764           if (name[1] == 'e' &&
7765               name[2] == 'x' &&
7766               name[3] == 't')
7767           {                                       /* next       */
7768             return KEY_next;
7769           }
7770
7771           goto unknown;
7772
7773         case 'o':
7774           if (name[1] == 'p' &&
7775               name[2] == 'e' &&
7776               name[3] == 'n')
7777           {                                       /* open       */
7778             return -KEY_open;
7779           }
7780
7781           goto unknown;
7782
7783         case 'p':
7784           switch (name[1])
7785           {
7786             case 'a':
7787               if (name[2] == 'c' &&
7788                   name[3] == 'k')
7789               {                                   /* pack       */
7790                 return -KEY_pack;
7791               }
7792
7793               goto unknown;
7794
7795             case 'i':
7796               if (name[2] == 'p' &&
7797                   name[3] == 'e')
7798               {                                   /* pipe       */
7799                 return -KEY_pipe;
7800               }
7801
7802               goto unknown;
7803
7804             case 'u':
7805               if (name[2] == 's' &&
7806                   name[3] == 'h')
7807               {                                   /* push       */
7808                 return -KEY_push;
7809               }
7810
7811               goto unknown;
7812
7813             default:
7814               goto unknown;
7815           }
7816
7817         case 'r':
7818           switch (name[1])
7819           {
7820             case 'a':
7821               if (name[2] == 'n' &&
7822                   name[3] == 'd')
7823               {                                   /* rand       */
7824                 return -KEY_rand;
7825               }
7826
7827               goto unknown;
7828
7829             case 'e':
7830               switch (name[2])
7831               {
7832                 case 'a':
7833                   if (name[3] == 'd')
7834                   {                               /* read       */
7835                     return -KEY_read;
7836                   }
7837
7838                   goto unknown;
7839
7840                 case 'c':
7841                   if (name[3] == 'v')
7842                   {                               /* recv       */
7843                     return -KEY_recv;
7844                   }
7845
7846                   goto unknown;
7847
7848                 case 'd':
7849                   if (name[3] == 'o')
7850                   {                               /* redo       */
7851                     return KEY_redo;
7852                   }
7853
7854                   goto unknown;
7855
7856                 default:
7857                   goto unknown;
7858               }
7859
7860             default:
7861               goto unknown;
7862           }
7863
7864         case 's':
7865           switch (name[1])
7866           {
7867             case 'e':
7868               switch (name[2])
7869               {
7870                 case 'e':
7871                   if (name[3] == 'k')
7872                   {                               /* seek       */
7873                     return -KEY_seek;
7874                   }
7875
7876                   goto unknown;
7877
7878                 case 'n':
7879                   if (name[3] == 'd')
7880                   {                               /* send       */
7881                     return -KEY_send;
7882                   }
7883
7884                   goto unknown;
7885
7886                 default:
7887                   goto unknown;
7888               }
7889
7890             case 'o':
7891               if (name[2] == 'r' &&
7892                   name[3] == 't')
7893               {                                   /* sort       */
7894                 return KEY_sort;
7895               }
7896
7897               goto unknown;
7898
7899             case 'q':
7900               if (name[2] == 'r' &&
7901                   name[3] == 't')
7902               {                                   /* sqrt       */
7903                 return -KEY_sqrt;
7904               }
7905
7906               goto unknown;
7907
7908             case 't':
7909               if (name[2] == 'a' &&
7910                   name[3] == 't')
7911               {                                   /* stat       */
7912                 return -KEY_stat;
7913               }
7914
7915               goto unknown;
7916
7917             default:
7918               goto unknown;
7919           }
7920
7921         case 't':
7922           switch (name[1])
7923           {
7924             case 'e':
7925               if (name[2] == 'l' &&
7926                   name[3] == 'l')
7927               {                                   /* tell       */
7928                 return -KEY_tell;
7929               }
7930
7931               goto unknown;
7932
7933             case 'i':
7934               switch (name[2])
7935               {
7936                 case 'e':
7937                   if (name[3] == 'd')
7938                   {                               /* tied       */
7939                     return KEY_tied;
7940                   }
7941
7942                   goto unknown;
7943
7944                 case 'm':
7945                   if (name[3] == 'e')
7946                   {                               /* time       */
7947                     return -KEY_time;
7948                   }
7949
7950                   goto unknown;
7951
7952                 default:
7953                   goto unknown;
7954               }
7955
7956             default:
7957               goto unknown;
7958           }
7959
7960         case 'w':
7961           switch (name[1])
7962           {
7963             case 'a':
7964               switch (name[2])
7965               {
7966                 case 'i':
7967                   if (name[3] == 't')
7968                   {                               /* wait       */
7969                     return -KEY_wait;
7970                   }
7971
7972                   goto unknown;
7973
7974                 case 'r':
7975                   if (name[3] == 'n')
7976                   {                               /* warn       */
7977                     return -KEY_warn;
7978                   }
7979
7980                   goto unknown;
7981
7982                 default:
7983                   goto unknown;
7984               }
7985
7986             case 'h':
7987               if (name[2] == 'e' &&
7988                   name[3] == 'n')
7989               {                                   /* when       */
7990                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
7991               }
7992
7993               goto unknown;
7994
7995             default:
7996               goto unknown;
7997           }
7998
7999         default:
8000           goto unknown;
8001       }
8002
8003     case 5: /* 39 tokens of length 5 */
8004       switch (name[0])
8005       {
8006         case 'B':
8007           if (name[1] == 'E' &&
8008               name[2] == 'G' &&
8009               name[3] == 'I' &&
8010               name[4] == 'N')
8011           {                                       /* BEGIN      */
8012             return KEY_BEGIN;
8013           }
8014
8015           goto unknown;
8016
8017         case 'C':
8018           if (name[1] == 'H' &&
8019               name[2] == 'E' &&
8020               name[3] == 'C' &&
8021               name[4] == 'K')
8022           {                                       /* CHECK      */
8023             return KEY_CHECK;
8024           }
8025
8026           goto unknown;
8027
8028         case 'a':
8029           switch (name[1])
8030           {
8031             case 'l':
8032               if (name[2] == 'a' &&
8033                   name[3] == 'r' &&
8034                   name[4] == 'm')
8035               {                                   /* alarm      */
8036                 return -KEY_alarm;
8037               }
8038
8039               goto unknown;
8040
8041             case 't':
8042               if (name[2] == 'a' &&
8043                   name[3] == 'n' &&
8044                   name[4] == '2')
8045               {                                   /* atan2      */
8046                 return -KEY_atan2;
8047               }
8048
8049               goto unknown;
8050
8051             default:
8052               goto unknown;
8053           }
8054
8055         case 'b':
8056           switch (name[1])
8057           {
8058             case 'l':
8059               if (name[2] == 'e' &&
8060                   name[3] == 's' &&
8061                   name[4] == 's')
8062               {                                   /* bless      */
8063                 return -KEY_bless;
8064               }
8065
8066               goto unknown;
8067
8068             case 'r':
8069               if (name[2] == 'e' &&
8070                   name[3] == 'a' &&
8071                   name[4] == 'k')
8072               {                                   /* break      */
8073                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8074               }
8075
8076               goto unknown;
8077
8078             default:
8079               goto unknown;
8080           }
8081
8082         case 'c':
8083           switch (name[1])
8084           {
8085             case 'h':
8086               switch (name[2])
8087               {
8088                 case 'd':
8089                   if (name[3] == 'i' &&
8090                       name[4] == 'r')
8091                   {                               /* chdir      */
8092                     return -KEY_chdir;
8093                   }
8094
8095                   goto unknown;
8096
8097                 case 'm':
8098                   if (name[3] == 'o' &&
8099                       name[4] == 'd')
8100                   {                               /* chmod      */
8101                     return -KEY_chmod;
8102                   }
8103
8104                   goto unknown;
8105
8106                 case 'o':
8107                   switch (name[3])
8108                   {
8109                     case 'm':
8110                       if (name[4] == 'p')
8111                       {                           /* chomp      */
8112                         return -KEY_chomp;
8113                       }
8114
8115                       goto unknown;
8116
8117                     case 'w':
8118                       if (name[4] == 'n')
8119                       {                           /* chown      */
8120                         return -KEY_chown;
8121                       }
8122
8123                       goto unknown;
8124
8125                     default:
8126                       goto unknown;
8127                   }
8128
8129                 default:
8130                   goto unknown;
8131               }
8132
8133             case 'l':
8134               if (name[2] == 'o' &&
8135                   name[3] == 's' &&
8136                   name[4] == 'e')
8137               {                                   /* close      */
8138                 return -KEY_close;
8139               }
8140
8141               goto unknown;
8142
8143             case 'r':
8144               if (name[2] == 'y' &&
8145                   name[3] == 'p' &&
8146                   name[4] == 't')
8147               {                                   /* crypt      */
8148                 return -KEY_crypt;
8149               }
8150
8151               goto unknown;
8152
8153             default:
8154               goto unknown;
8155           }
8156
8157         case 'e':
8158           if (name[1] == 'l' &&
8159               name[2] == 's' &&
8160               name[3] == 'i' &&
8161               name[4] == 'f')
8162           {                                       /* elsif      */
8163             return KEY_elsif;
8164           }
8165
8166           goto unknown;
8167
8168         case 'f':
8169           switch (name[1])
8170           {
8171             case 'c':
8172               if (name[2] == 'n' &&
8173                   name[3] == 't' &&
8174                   name[4] == 'l')
8175               {                                   /* fcntl      */
8176                 return -KEY_fcntl;
8177               }
8178
8179               goto unknown;
8180
8181             case 'l':
8182               if (name[2] == 'o' &&
8183                   name[3] == 'c' &&
8184                   name[4] == 'k')
8185               {                                   /* flock      */
8186                 return -KEY_flock;
8187               }
8188
8189               goto unknown;
8190
8191             default:
8192               goto unknown;
8193           }
8194
8195         case 'g':
8196           if (name[1] == 'i' &&
8197               name[2] == 'v' &&
8198               name[3] == 'e' &&
8199               name[4] == 'n')
8200           {                                       /* given      */
8201             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8202           }
8203
8204           goto unknown;
8205
8206         case 'i':
8207           switch (name[1])
8208           {
8209             case 'n':
8210               if (name[2] == 'd' &&
8211                   name[3] == 'e' &&
8212                   name[4] == 'x')
8213               {                                   /* index      */
8214                 return -KEY_index;
8215               }
8216
8217               goto unknown;
8218
8219             case 'o':
8220               if (name[2] == 'c' &&
8221                   name[3] == 't' &&
8222                   name[4] == 'l')
8223               {                                   /* ioctl      */
8224                 return -KEY_ioctl;
8225               }
8226
8227               goto unknown;
8228
8229             default:
8230               goto unknown;
8231           }
8232
8233         case 'l':
8234           switch (name[1])
8235           {
8236             case 'o':
8237               if (name[2] == 'c' &&
8238                   name[3] == 'a' &&
8239                   name[4] == 'l')
8240               {                                   /* local      */
8241                 return KEY_local;
8242               }
8243
8244               goto unknown;
8245
8246             case 's':
8247               if (name[2] == 't' &&
8248                   name[3] == 'a' &&
8249                   name[4] == 't')
8250               {                                   /* lstat      */
8251                 return -KEY_lstat;
8252               }
8253
8254               goto unknown;
8255
8256             default:
8257               goto unknown;
8258           }
8259
8260         case 'm':
8261           if (name[1] == 'k' &&
8262               name[2] == 'd' &&
8263               name[3] == 'i' &&
8264               name[4] == 'r')
8265           {                                       /* mkdir      */
8266             return -KEY_mkdir;
8267           }
8268
8269           goto unknown;
8270
8271         case 'p':
8272           if (name[1] == 'r' &&
8273               name[2] == 'i' &&
8274               name[3] == 'n' &&
8275               name[4] == 't')
8276           {                                       /* print      */
8277             return KEY_print;
8278           }
8279
8280           goto unknown;
8281
8282         case 'r':
8283           switch (name[1])
8284           {
8285             case 'e':
8286               if (name[2] == 's' &&
8287                   name[3] == 'e' &&
8288                   name[4] == 't')
8289               {                                   /* reset      */
8290                 return -KEY_reset;
8291               }
8292
8293               goto unknown;
8294
8295             case 'm':
8296               if (name[2] == 'd' &&
8297                   name[3] == 'i' &&
8298                   name[4] == 'r')
8299               {                                   /* rmdir      */
8300                 return -KEY_rmdir;
8301               }
8302
8303               goto unknown;
8304
8305             default:
8306               goto unknown;
8307           }
8308
8309         case 's':
8310           switch (name[1])
8311           {
8312             case 'e':
8313               if (name[2] == 'm' &&
8314                   name[3] == 'o' &&
8315                   name[4] == 'p')
8316               {                                   /* semop      */
8317                 return -KEY_semop;
8318               }
8319
8320               goto unknown;
8321
8322             case 'h':
8323               if (name[2] == 'i' &&
8324                   name[3] == 'f' &&
8325                   name[4] == 't')
8326               {                                   /* shift      */
8327                 return -KEY_shift;
8328               }
8329
8330               goto unknown;
8331
8332             case 'l':
8333               if (name[2] == 'e' &&
8334                   name[3] == 'e' &&
8335                   name[4] == 'p')
8336               {                                   /* sleep      */
8337                 return -KEY_sleep;
8338               }
8339
8340               goto unknown;
8341
8342             case 'p':
8343               if (name[2] == 'l' &&
8344                   name[3] == 'i' &&
8345                   name[4] == 't')
8346               {                                   /* split      */
8347                 return KEY_split;
8348               }
8349
8350               goto unknown;
8351
8352             case 'r':
8353               if (name[2] == 'a' &&
8354                   name[3] == 'n' &&
8355                   name[4] == 'd')
8356               {                                   /* srand      */
8357                 return -KEY_srand;
8358               }
8359
8360               goto unknown;
8361
8362             case 't':
8363               switch (name[2])
8364               {
8365                 case 'a':
8366                   if (name[3] == 't' &&
8367                       name[4] == 'e')
8368                   {                               /* state      */
8369                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8370                   }
8371
8372                   goto unknown;
8373
8374                 case 'u':
8375                   if (name[3] == 'd' &&
8376                       name[4] == 'y')
8377                   {                               /* study      */
8378                     return KEY_study;
8379                   }
8380
8381                   goto unknown;
8382
8383                 default:
8384                   goto unknown;
8385               }
8386
8387             default:
8388               goto unknown;
8389           }
8390
8391         case 't':
8392           if (name[1] == 'i' &&
8393               name[2] == 'm' &&
8394               name[3] == 'e' &&
8395               name[4] == 's')
8396           {                                       /* times      */
8397             return -KEY_times;
8398           }
8399
8400           goto unknown;
8401
8402         case 'u':
8403           switch (name[1])
8404           {
8405             case 'm':
8406               if (name[2] == 'a' &&
8407                   name[3] == 's' &&
8408                   name[4] == 'k')
8409               {                                   /* umask      */
8410                 return -KEY_umask;
8411               }
8412
8413               goto unknown;
8414
8415             case 'n':
8416               switch (name[2])
8417               {
8418                 case 'd':
8419                   if (name[3] == 'e' &&
8420                       name[4] == 'f')
8421                   {                               /* undef      */
8422                     return KEY_undef;
8423                   }
8424
8425                   goto unknown;
8426
8427                 case 't':
8428                   if (name[3] == 'i')
8429                   {
8430                     switch (name[4])
8431                     {
8432                       case 'e':
8433                         {                         /* untie      */
8434                           return KEY_untie;
8435                         }
8436
8437                       case 'l':
8438                         {                         /* until      */
8439                           return KEY_until;
8440                         }
8441
8442                       default:
8443                         goto unknown;
8444                     }
8445                   }
8446
8447                   goto unknown;
8448
8449                 default:
8450                   goto unknown;
8451               }
8452
8453             case 't':
8454               if (name[2] == 'i' &&
8455                   name[3] == 'm' &&
8456                   name[4] == 'e')
8457               {                                   /* utime      */
8458                 return -KEY_utime;
8459               }
8460
8461               goto unknown;
8462
8463             default:
8464               goto unknown;
8465           }
8466
8467         case 'w':
8468           switch (name[1])
8469           {
8470             case 'h':
8471               if (name[2] == 'i' &&
8472                   name[3] == 'l' &&
8473                   name[4] == 'e')
8474               {                                   /* while      */
8475                 return KEY_while;
8476               }
8477
8478               goto unknown;
8479
8480             case 'r':
8481               if (name[2] == 'i' &&
8482                   name[3] == 't' &&
8483                   name[4] == 'e')
8484               {                                   /* write      */
8485                 return -KEY_write;
8486               }
8487
8488               goto unknown;
8489
8490             default:
8491               goto unknown;
8492           }
8493
8494         default:
8495           goto unknown;
8496       }
8497
8498     case 6: /* 33 tokens of length 6 */
8499       switch (name[0])
8500       {
8501         case 'a':
8502           if (name[1] == 'c' &&
8503               name[2] == 'c' &&
8504               name[3] == 'e' &&
8505               name[4] == 'p' &&
8506               name[5] == 't')
8507           {                                       /* accept     */
8508             return -KEY_accept;
8509           }
8510
8511           goto unknown;
8512
8513         case 'c':
8514           switch (name[1])
8515           {
8516             case 'a':
8517               if (name[2] == 'l' &&
8518                   name[3] == 'l' &&
8519                   name[4] == 'e' &&
8520                   name[5] == 'r')
8521               {                                   /* caller     */
8522                 return -KEY_caller;
8523               }
8524
8525               goto unknown;
8526
8527             case 'h':
8528               if (name[2] == 'r' &&
8529                   name[3] == 'o' &&
8530                   name[4] == 'o' &&
8531                   name[5] == 't')
8532               {                                   /* chroot     */
8533                 return -KEY_chroot;
8534               }
8535
8536               goto unknown;
8537
8538             default:
8539               goto unknown;
8540           }
8541
8542         case 'd':
8543           if (name[1] == 'e' &&
8544               name[2] == 'l' &&
8545               name[3] == 'e' &&
8546               name[4] == 't' &&
8547               name[5] == 'e')
8548           {                                       /* delete     */
8549             return KEY_delete;
8550           }
8551
8552           goto unknown;
8553
8554         case 'e':
8555           switch (name[1])
8556           {
8557             case 'l':
8558               if (name[2] == 's' &&
8559                   name[3] == 'e' &&
8560                   name[4] == 'i' &&
8561                   name[5] == 'f')
8562               {                                   /* elseif     */
8563                 if(ckWARN_d(WARN_SYNTAX))
8564                   Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8565               }
8566
8567               goto unknown;
8568
8569             case 'x':
8570               if (name[2] == 'i' &&
8571                   name[3] == 's' &&
8572                   name[4] == 't' &&
8573                   name[5] == 's')
8574               {                                   /* exists     */
8575                 return KEY_exists;
8576               }
8577
8578               goto unknown;
8579
8580             default:
8581               goto unknown;
8582           }
8583
8584         case 'f':
8585           switch (name[1])
8586           {
8587             case 'i':
8588               if (name[2] == 'l' &&
8589                   name[3] == 'e' &&
8590                   name[4] == 'n' &&
8591                   name[5] == 'o')
8592               {                                   /* fileno     */
8593                 return -KEY_fileno;
8594               }
8595
8596               goto unknown;
8597
8598             case 'o':
8599               if (name[2] == 'r' &&
8600                   name[3] == 'm' &&
8601                   name[4] == 'a' &&
8602                   name[5] == 't')
8603               {                                   /* format     */
8604                 return KEY_format;
8605               }
8606
8607               goto unknown;
8608
8609             default:
8610               goto unknown;
8611           }
8612
8613         case 'g':
8614           if (name[1] == 'm' &&
8615               name[2] == 't' &&
8616               name[3] == 'i' &&
8617               name[4] == 'm' &&
8618               name[5] == 'e')
8619           {                                       /* gmtime     */
8620             return -KEY_gmtime;
8621           }
8622
8623           goto unknown;
8624
8625         case 'l':
8626           switch (name[1])
8627           {
8628             case 'e':
8629               if (name[2] == 'n' &&
8630                   name[3] == 'g' &&
8631                   name[4] == 't' &&
8632                   name[5] == 'h')
8633               {                                   /* length     */
8634                 return -KEY_length;
8635               }
8636
8637               goto unknown;
8638
8639             case 'i':
8640               if (name[2] == 's' &&
8641                   name[3] == 't' &&
8642                   name[4] == 'e' &&
8643                   name[5] == 'n')
8644               {                                   /* listen     */
8645                 return -KEY_listen;
8646               }
8647
8648               goto unknown;
8649
8650             default:
8651               goto unknown;
8652           }
8653
8654         case 'm':
8655           if (name[1] == 's' &&
8656               name[2] == 'g')
8657           {
8658             switch (name[3])
8659             {
8660               case 'c':
8661                 if (name[4] == 't' &&
8662                     name[5] == 'l')
8663                 {                                 /* msgctl     */
8664                   return -KEY_msgctl;
8665                 }
8666
8667                 goto unknown;
8668
8669               case 'g':
8670                 if (name[4] == 'e' &&
8671                     name[5] == 't')
8672                 {                                 /* msgget     */
8673                   return -KEY_msgget;
8674                 }
8675
8676                 goto unknown;
8677
8678               case 'r':
8679                 if (name[4] == 'c' &&
8680                     name[5] == 'v')
8681                 {                                 /* msgrcv     */
8682                   return -KEY_msgrcv;
8683                 }
8684
8685                 goto unknown;
8686
8687               case 's':
8688                 if (name[4] == 'n' &&
8689                     name[5] == 'd')
8690                 {                                 /* msgsnd     */
8691                   return -KEY_msgsnd;
8692                 }
8693
8694                 goto unknown;
8695
8696               default:
8697                 goto unknown;
8698             }
8699           }
8700
8701           goto unknown;
8702
8703         case 'p':
8704           if (name[1] == 'r' &&
8705               name[2] == 'i' &&
8706               name[3] == 'n' &&
8707               name[4] == 't' &&
8708               name[5] == 'f')
8709           {                                       /* printf     */
8710             return KEY_printf;
8711           }
8712
8713           goto unknown;
8714
8715         case 'r':
8716           switch (name[1])
8717           {
8718             case 'e':
8719               switch (name[2])
8720               {
8721                 case 'n':
8722                   if (name[3] == 'a' &&
8723                       name[4] == 'm' &&
8724                       name[5] == 'e')
8725                   {                               /* rename     */
8726                     return -KEY_rename;
8727                   }
8728
8729                   goto unknown;
8730
8731                 case 't':
8732                   if (name[3] == 'u' &&
8733                       name[4] == 'r' &&
8734                       name[5] == 'n')
8735                   {                               /* return     */
8736                     return KEY_return;
8737                   }
8738
8739                   goto unknown;
8740
8741                 default:
8742                   goto unknown;
8743               }
8744
8745             case 'i':
8746               if (name[2] == 'n' &&
8747                   name[3] == 'd' &&
8748                   name[4] == 'e' &&
8749                   name[5] == 'x')
8750               {                                   /* rindex     */
8751                 return -KEY_rindex;
8752               }
8753
8754               goto unknown;
8755
8756             default:
8757               goto unknown;
8758           }
8759
8760         case 's':
8761           switch (name[1])
8762           {
8763             case 'c':
8764               if (name[2] == 'a' &&
8765                   name[3] == 'l' &&
8766                   name[4] == 'a' &&
8767                   name[5] == 'r')
8768               {                                   /* scalar     */
8769                 return KEY_scalar;
8770               }
8771
8772               goto unknown;
8773
8774             case 'e':
8775               switch (name[2])
8776               {
8777                 case 'l':
8778                   if (name[3] == 'e' &&
8779                       name[4] == 'c' &&
8780                       name[5] == 't')
8781                   {                               /* select     */
8782                     return -KEY_select;
8783                   }
8784
8785                   goto unknown;
8786
8787                 case 'm':
8788                   switch (name[3])
8789                   {
8790                     case 'c':
8791                       if (name[4] == 't' &&
8792                           name[5] == 'l')
8793                       {                           /* semctl     */
8794                         return -KEY_semctl;
8795                       }
8796
8797                       goto unknown;
8798
8799                     case 'g':
8800                       if (name[4] == 'e' &&
8801                           name[5] == 't')
8802                       {                           /* semget     */
8803                         return -KEY_semget;
8804                       }
8805
8806                       goto unknown;
8807
8808                     default:
8809                       goto unknown;
8810                   }
8811
8812                 default:
8813                   goto unknown;
8814               }
8815
8816             case 'h':
8817               if (name[2] == 'm')
8818               {
8819                 switch (name[3])
8820                 {
8821                   case 'c':
8822                     if (name[4] == 't' &&
8823                         name[5] == 'l')
8824                     {                             /* shmctl     */
8825                       return -KEY_shmctl;
8826                     }
8827
8828                     goto unknown;
8829
8830                   case 'g':
8831                     if (name[4] == 'e' &&
8832                         name[5] == 't')
8833                     {                             /* shmget     */
8834                       return -KEY_shmget;
8835                     }
8836
8837                     goto unknown;
8838
8839                   default:
8840                     goto unknown;
8841                 }
8842               }
8843
8844               goto unknown;
8845
8846             case 'o':
8847               if (name[2] == 'c' &&
8848                   name[3] == 'k' &&
8849                   name[4] == 'e' &&
8850                   name[5] == 't')
8851               {                                   /* socket     */
8852                 return -KEY_socket;
8853               }
8854
8855               goto unknown;
8856
8857             case 'p':
8858               if (name[2] == 'l' &&
8859                   name[3] == 'i' &&
8860                   name[4] == 'c' &&
8861                   name[5] == 'e')
8862               {                                   /* splice     */
8863                 return -KEY_splice;
8864               }
8865
8866               goto unknown;
8867
8868             case 'u':
8869               if (name[2] == 'b' &&
8870                   name[3] == 's' &&
8871                   name[4] == 't' &&
8872                   name[5] == 'r')
8873               {                                   /* substr     */
8874                 return -KEY_substr;
8875               }
8876
8877               goto unknown;
8878
8879             case 'y':
8880               if (name[2] == 's' &&
8881                   name[3] == 't' &&
8882                   name[4] == 'e' &&
8883                   name[5] == 'm')
8884               {                                   /* system     */
8885                 return -KEY_system;
8886               }
8887
8888               goto unknown;
8889
8890             default:
8891               goto unknown;
8892           }
8893
8894         case 'u':
8895           if (name[1] == 'n')
8896           {
8897             switch (name[2])
8898             {
8899               case 'l':
8900                 switch (name[3])
8901                 {
8902                   case 'e':
8903                     if (name[4] == 's' &&
8904                         name[5] == 's')
8905                     {                             /* unless     */
8906                       return KEY_unless;
8907                     }
8908
8909                     goto unknown;
8910
8911                   case 'i':
8912                     if (name[4] == 'n' &&
8913                         name[5] == 'k')
8914                     {                             /* unlink     */
8915                       return -KEY_unlink;
8916                     }
8917
8918                     goto unknown;
8919
8920                   default:
8921                     goto unknown;
8922                 }
8923
8924               case 'p':
8925                 if (name[3] == 'a' &&
8926                     name[4] == 'c' &&
8927                     name[5] == 'k')
8928                 {                                 /* unpack     */
8929                   return -KEY_unpack;
8930                 }
8931
8932                 goto unknown;
8933
8934               default:
8935                 goto unknown;
8936             }
8937           }
8938
8939           goto unknown;
8940
8941         case 'v':
8942           if (name[1] == 'a' &&
8943               name[2] == 'l' &&
8944               name[3] == 'u' &&
8945               name[4] == 'e' &&
8946               name[5] == 's')
8947           {                                       /* values     */
8948             return -KEY_values;
8949           }
8950
8951           goto unknown;
8952
8953         default:
8954           goto unknown;
8955       }
8956
8957     case 7: /* 29 tokens of length 7 */
8958       switch (name[0])
8959       {
8960         case 'D':
8961           if (name[1] == 'E' &&
8962               name[2] == 'S' &&
8963               name[3] == 'T' &&
8964               name[4] == 'R' &&
8965               name[5] == 'O' &&
8966               name[6] == 'Y')
8967           {                                       /* DESTROY    */
8968             return KEY_DESTROY;
8969           }
8970
8971           goto unknown;
8972
8973         case '_':
8974           if (name[1] == '_' &&
8975               name[2] == 'E' &&
8976               name[3] == 'N' &&
8977               name[4] == 'D' &&
8978               name[5] == '_' &&
8979               name[6] == '_')
8980           {                                       /* __END__    */
8981             return KEY___END__;
8982           }
8983
8984           goto unknown;
8985
8986         case 'b':
8987           if (name[1] == 'i' &&
8988               name[2] == 'n' &&
8989               name[3] == 'm' &&
8990               name[4] == 'o' &&
8991               name[5] == 'd' &&
8992               name[6] == 'e')
8993           {                                       /* binmode    */
8994             return -KEY_binmode;
8995           }
8996
8997           goto unknown;
8998
8999         case 'c':
9000           if (name[1] == 'o' &&
9001               name[2] == 'n' &&
9002               name[3] == 'n' &&
9003               name[4] == 'e' &&
9004               name[5] == 'c' &&
9005               name[6] == 't')
9006           {                                       /* connect    */
9007             return -KEY_connect;
9008           }
9009
9010           goto unknown;
9011
9012         case 'd':
9013           switch (name[1])
9014           {
9015             case 'b':
9016               if (name[2] == 'm' &&
9017                   name[3] == 'o' &&
9018                   name[4] == 'p' &&
9019                   name[5] == 'e' &&
9020                   name[6] == 'n')
9021               {                                   /* dbmopen    */
9022                 return -KEY_dbmopen;
9023               }
9024
9025               goto unknown;
9026
9027             case 'e':
9028               if (name[2] == 'f')
9029               {
9030                 switch (name[3])
9031                 {
9032                   case 'a':
9033                     if (name[4] == 'u' &&
9034                         name[5] == 'l' &&
9035                         name[6] == 't')
9036                     {                             /* default    */
9037                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9038                     }
9039
9040                     goto unknown;
9041
9042                   case 'i':
9043                     if (name[4] == 'n' &&
9044                         name[5] == 'e' &&
9045                         name[6] == 'd')
9046                     {                             /* defined    */
9047                       return KEY_defined;
9048                     }
9049
9050                     goto unknown;
9051
9052                   default:
9053                     goto unknown;
9054                 }
9055               }
9056
9057               goto unknown;
9058
9059             default:
9060               goto unknown;
9061           }
9062
9063         case 'f':
9064           if (name[1] == 'o' &&
9065               name[2] == 'r' &&
9066               name[3] == 'e' &&
9067               name[4] == 'a' &&
9068               name[5] == 'c' &&
9069               name[6] == 'h')
9070           {                                       /* foreach    */
9071             return KEY_foreach;
9072           }
9073
9074           goto unknown;
9075
9076         case 'g':
9077           if (name[1] == 'e' &&
9078               name[2] == 't' &&
9079               name[3] == 'p')
9080           {
9081             switch (name[4])
9082             {
9083               case 'g':
9084                 if (name[5] == 'r' &&
9085                     name[6] == 'p')
9086                 {                                 /* getpgrp    */
9087                   return -KEY_getpgrp;
9088                 }
9089
9090                 goto unknown;
9091
9092               case 'p':
9093                 if (name[5] == 'i' &&
9094                     name[6] == 'd')
9095                 {                                 /* getppid    */
9096                   return -KEY_getppid;
9097                 }
9098
9099                 goto unknown;
9100
9101               default:
9102                 goto unknown;
9103             }
9104           }
9105
9106           goto unknown;
9107
9108         case 'l':
9109           if (name[1] == 'c' &&
9110               name[2] == 'f' &&
9111               name[3] == 'i' &&
9112               name[4] == 'r' &&
9113               name[5] == 's' &&
9114               name[6] == 't')
9115           {                                       /* lcfirst    */
9116             return -KEY_lcfirst;
9117           }
9118
9119           goto unknown;
9120
9121         case 'o':
9122           if (name[1] == 'p' &&
9123               name[2] == 'e' &&
9124               name[3] == 'n' &&
9125               name[4] == 'd' &&
9126               name[5] == 'i' &&
9127               name[6] == 'r')
9128           {                                       /* opendir    */
9129             return -KEY_opendir;
9130           }
9131
9132           goto unknown;
9133
9134         case 'p':
9135           if (name[1] == 'a' &&
9136               name[2] == 'c' &&
9137               name[3] == 'k' &&
9138               name[4] == 'a' &&
9139               name[5] == 'g' &&
9140               name[6] == 'e')
9141           {                                       /* package    */
9142             return KEY_package;
9143           }
9144
9145           goto unknown;
9146
9147         case 'r':
9148           if (name[1] == 'e')
9149           {
9150             switch (name[2])
9151             {
9152               case 'a':
9153                 if (name[3] == 'd' &&
9154                     name[4] == 'd' &&
9155                     name[5] == 'i' &&
9156                     name[6] == 'r')
9157                 {                                 /* readdir    */
9158                   return -KEY_readdir;
9159                 }
9160
9161                 goto unknown;
9162
9163               case 'q':
9164                 if (name[3] == 'u' &&
9165                     name[4] == 'i' &&
9166                     name[5] == 'r' &&
9167                     name[6] == 'e')
9168                 {                                 /* require    */
9169                   return KEY_require;
9170                 }
9171
9172                 goto unknown;
9173
9174               case 'v':
9175                 if (name[3] == 'e' &&
9176                     name[4] == 'r' &&
9177                     name[5] == 's' &&
9178                     name[6] == 'e')
9179                 {                                 /* reverse    */
9180                   return -KEY_reverse;
9181                 }
9182
9183                 goto unknown;
9184
9185               default:
9186                 goto unknown;
9187             }
9188           }
9189
9190           goto unknown;
9191
9192         case 's':
9193           switch (name[1])
9194           {
9195             case 'e':
9196               switch (name[2])
9197               {
9198                 case 'e':
9199                   if (name[3] == 'k' &&
9200                       name[4] == 'd' &&
9201                       name[5] == 'i' &&
9202                       name[6] == 'r')
9203                   {                               /* seekdir    */
9204                     return -KEY_seekdir;
9205                   }
9206
9207                   goto unknown;
9208
9209                 case 't':
9210                   if (name[3] == 'p' &&
9211                       name[4] == 'g' &&
9212                       name[5] == 'r' &&
9213                       name[6] == 'p')
9214                   {                               /* setpgrp    */
9215                     return -KEY_setpgrp;
9216                   }
9217
9218                   goto unknown;
9219
9220                 default:
9221                   goto unknown;
9222               }
9223
9224             case 'h':
9225               if (name[2] == 'm' &&
9226                   name[3] == 'r' &&
9227                   name[4] == 'e' &&
9228                   name[5] == 'a' &&
9229                   name[6] == 'd')
9230               {                                   /* shmread    */
9231                 return -KEY_shmread;
9232               }
9233
9234               goto unknown;
9235
9236             case 'p':
9237               if (name[2] == 'r' &&
9238                   name[3] == 'i' &&
9239                   name[4] == 'n' &&
9240                   name[5] == 't' &&
9241                   name[6] == 'f')
9242               {                                   /* sprintf    */
9243                 return -KEY_sprintf;
9244               }
9245
9246               goto unknown;
9247
9248             case 'y':
9249               switch (name[2])
9250               {
9251                 case 'm':
9252                   if (name[3] == 'l' &&
9253                       name[4] == 'i' &&
9254                       name[5] == 'n' &&
9255                       name[6] == 'k')
9256                   {                               /* symlink    */
9257                     return -KEY_symlink;
9258                   }
9259
9260                   goto unknown;
9261
9262                 case 's':
9263                   switch (name[3])
9264                   {
9265                     case 'c':
9266                       if (name[4] == 'a' &&
9267                           name[5] == 'l' &&
9268                           name[6] == 'l')
9269                       {                           /* syscall    */
9270                         return -KEY_syscall;
9271                       }
9272
9273                       goto unknown;
9274
9275                     case 'o':
9276                       if (name[4] == 'p' &&
9277                           name[5] == 'e' &&
9278                           name[6] == 'n')
9279                       {                           /* sysopen    */
9280                         return -KEY_sysopen;
9281                       }
9282
9283                       goto unknown;
9284
9285                     case 'r':
9286                       if (name[4] == 'e' &&
9287                           name[5] == 'a' &&
9288                           name[6] == 'd')
9289                       {                           /* sysread    */
9290                         return -KEY_sysread;
9291                       }
9292
9293                       goto unknown;
9294
9295                     case 's':
9296                       if (name[4] == 'e' &&
9297                           name[5] == 'e' &&
9298                           name[6] == 'k')
9299                       {                           /* sysseek    */
9300                         return -KEY_sysseek;
9301                       }
9302
9303                       goto unknown;
9304
9305                     default:
9306                       goto unknown;
9307                   }
9308
9309                 default:
9310                   goto unknown;
9311               }
9312
9313             default:
9314               goto unknown;
9315           }
9316
9317         case 't':
9318           if (name[1] == 'e' &&
9319               name[2] == 'l' &&
9320               name[3] == 'l' &&
9321               name[4] == 'd' &&
9322               name[5] == 'i' &&
9323               name[6] == 'r')
9324           {                                       /* telldir    */
9325             return -KEY_telldir;
9326           }
9327
9328           goto unknown;
9329
9330         case 'u':
9331           switch (name[1])
9332           {
9333             case 'c':
9334               if (name[2] == 'f' &&
9335                   name[3] == 'i' &&
9336                   name[4] == 'r' &&
9337                   name[5] == 's' &&
9338                   name[6] == 't')
9339               {                                   /* ucfirst    */
9340                 return -KEY_ucfirst;
9341               }
9342
9343               goto unknown;
9344
9345             case 'n':
9346               if (name[2] == 's' &&
9347                   name[3] == 'h' &&
9348                   name[4] == 'i' &&
9349                   name[5] == 'f' &&
9350                   name[6] == 't')
9351               {                                   /* unshift    */
9352                 return -KEY_unshift;
9353               }
9354
9355               goto unknown;
9356
9357             default:
9358               goto unknown;
9359           }
9360
9361         case 'w':
9362           if (name[1] == 'a' &&
9363               name[2] == 'i' &&
9364               name[3] == 't' &&
9365               name[4] == 'p' &&
9366               name[5] == 'i' &&
9367               name[6] == 'd')
9368           {                                       /* waitpid    */
9369             return -KEY_waitpid;
9370           }
9371
9372           goto unknown;
9373
9374         default:
9375           goto unknown;
9376       }
9377
9378     case 8: /* 26 tokens of length 8 */
9379       switch (name[0])
9380       {
9381         case 'A':
9382           if (name[1] == 'U' &&
9383               name[2] == 'T' &&
9384               name[3] == 'O' &&
9385               name[4] == 'L' &&
9386               name[5] == 'O' &&
9387               name[6] == 'A' &&
9388               name[7] == 'D')
9389           {                                       /* AUTOLOAD   */
9390             return KEY_AUTOLOAD;
9391           }
9392
9393           goto unknown;
9394
9395         case '_':
9396           if (name[1] == '_')
9397           {
9398             switch (name[2])
9399             {
9400               case 'D':
9401                 if (name[3] == 'A' &&
9402                     name[4] == 'T' &&
9403                     name[5] == 'A' &&
9404                     name[6] == '_' &&
9405                     name[7] == '_')
9406                 {                                 /* __DATA__   */
9407                   return KEY___DATA__;
9408                 }
9409
9410                 goto unknown;
9411
9412               case 'F':
9413                 if (name[3] == 'I' &&
9414                     name[4] == 'L' &&
9415                     name[5] == 'E' &&
9416                     name[6] == '_' &&
9417                     name[7] == '_')
9418                 {                                 /* __FILE__   */
9419                   return -KEY___FILE__;
9420                 }
9421
9422                 goto unknown;
9423
9424               case 'L':
9425                 if (name[3] == 'I' &&
9426                     name[4] == 'N' &&
9427                     name[5] == 'E' &&
9428                     name[6] == '_' &&
9429                     name[7] == '_')
9430                 {                                 /* __LINE__   */
9431                   return -KEY___LINE__;
9432                 }
9433
9434                 goto unknown;
9435
9436               default:
9437                 goto unknown;
9438             }
9439           }
9440
9441           goto unknown;
9442
9443         case 'c':
9444           switch (name[1])
9445           {
9446             case 'l':
9447               if (name[2] == 'o' &&
9448                   name[3] == 's' &&
9449                   name[4] == 'e' &&
9450                   name[5] == 'd' &&
9451                   name[6] == 'i' &&
9452                   name[7] == 'r')
9453               {                                   /* closedir   */
9454                 return -KEY_closedir;
9455               }
9456
9457               goto unknown;
9458
9459             case 'o':
9460               if (name[2] == 'n' &&
9461                   name[3] == 't' &&
9462                   name[4] == 'i' &&
9463                   name[5] == 'n' &&
9464                   name[6] == 'u' &&
9465                   name[7] == 'e')
9466               {                                   /* continue   */
9467                 return -KEY_continue;
9468               }
9469
9470               goto unknown;
9471
9472             default:
9473               goto unknown;
9474           }
9475
9476         case 'd':
9477           if (name[1] == 'b' &&
9478               name[2] == 'm' &&
9479               name[3] == 'c' &&
9480               name[4] == 'l' &&
9481               name[5] == 'o' &&
9482               name[6] == 's' &&
9483               name[7] == 'e')
9484           {                                       /* dbmclose   */
9485             return -KEY_dbmclose;
9486           }
9487
9488           goto unknown;
9489
9490         case 'e':
9491           if (name[1] == 'n' &&
9492               name[2] == 'd')
9493           {
9494             switch (name[3])
9495             {
9496               case 'g':
9497                 if (name[4] == 'r' &&
9498                     name[5] == 'e' &&
9499                     name[6] == 'n' &&
9500                     name[7] == 't')
9501                 {                                 /* endgrent   */
9502                   return -KEY_endgrent;
9503                 }
9504
9505                 goto unknown;
9506
9507               case 'p':
9508                 if (name[4] == 'w' &&
9509                     name[5] == 'e' &&
9510                     name[6] == 'n' &&
9511                     name[7] == 't')
9512                 {                                 /* endpwent   */
9513                   return -KEY_endpwent;
9514                 }
9515
9516                 goto unknown;
9517
9518               default:
9519                 goto unknown;
9520             }
9521           }
9522
9523           goto unknown;
9524
9525         case 'f':
9526           if (name[1] == 'o' &&
9527               name[2] == 'r' &&
9528               name[3] == 'm' &&
9529               name[4] == 'l' &&
9530               name[5] == 'i' &&
9531               name[6] == 'n' &&
9532               name[7] == 'e')
9533           {                                       /* formline   */
9534             return -KEY_formline;
9535           }
9536
9537           goto unknown;
9538
9539         case 'g':
9540           if (name[1] == 'e' &&
9541               name[2] == 't')
9542           {
9543             switch (name[3])
9544             {
9545               case 'g':
9546                 if (name[4] == 'r')
9547                 {
9548                   switch (name[5])
9549                   {
9550                     case 'e':
9551                       if (name[6] == 'n' &&
9552                           name[7] == 't')
9553                       {                           /* getgrent   */
9554                         return -KEY_getgrent;
9555                       }
9556
9557                       goto unknown;
9558
9559                     case 'g':
9560                       if (name[6] == 'i' &&
9561                           name[7] == 'd')
9562                       {                           /* getgrgid   */
9563                         return -KEY_getgrgid;
9564                       }
9565
9566                       goto unknown;
9567
9568                     case 'n':
9569                       if (name[6] == 'a' &&
9570                           name[7] == 'm')
9571                       {                           /* getgrnam   */
9572                         return -KEY_getgrnam;
9573                       }
9574
9575                       goto unknown;
9576
9577                     default:
9578                       goto unknown;
9579                   }
9580                 }
9581
9582                 goto unknown;
9583
9584               case 'l':
9585                 if (name[4] == 'o' &&
9586                     name[5] == 'g' &&
9587                     name[6] == 'i' &&
9588                     name[7] == 'n')
9589                 {                                 /* getlogin   */
9590                   return -KEY_getlogin;
9591                 }
9592
9593                 goto unknown;
9594
9595               case 'p':
9596                 if (name[4] == 'w')
9597                 {
9598                   switch (name[5])
9599                   {
9600                     case 'e':
9601                       if (name[6] == 'n' &&
9602                           name[7] == 't')
9603                       {                           /* getpwent   */
9604                         return -KEY_getpwent;
9605                       }
9606
9607                       goto unknown;
9608
9609                     case 'n':
9610                       if (name[6] == 'a' &&
9611                           name[7] == 'm')
9612                       {                           /* getpwnam   */
9613                         return -KEY_getpwnam;
9614                       }
9615
9616                       goto unknown;
9617
9618                     case 'u':
9619                       if (name[6] == 'i' &&
9620                           name[7] == 'd')
9621                       {                           /* getpwuid   */
9622                         return -KEY_getpwuid;
9623                       }
9624
9625                       goto unknown;
9626
9627                     default:
9628                       goto unknown;
9629                   }
9630                 }
9631
9632                 goto unknown;
9633
9634               default:
9635                 goto unknown;
9636             }
9637           }
9638
9639           goto unknown;
9640
9641         case 'r':
9642           if (name[1] == 'e' &&
9643               name[2] == 'a' &&
9644               name[3] == 'd')
9645           {
9646             switch (name[4])
9647             {
9648               case 'l':
9649                 if (name[5] == 'i' &&
9650                     name[6] == 'n')
9651                 {
9652                   switch (name[7])
9653                   {
9654                     case 'e':
9655                       {                           /* readline   */
9656                         return -KEY_readline;
9657                       }
9658
9659                     case 'k':
9660                       {                           /* readlink   */
9661                         return -KEY_readlink;
9662                       }
9663
9664                     default:
9665                       goto unknown;
9666                   }
9667                 }
9668
9669                 goto unknown;
9670
9671               case 'p':
9672                 if (name[5] == 'i' &&
9673                     name[6] == 'p' &&
9674                     name[7] == 'e')
9675                 {                                 /* readpipe   */
9676                   return -KEY_readpipe;
9677                 }
9678
9679                 goto unknown;
9680
9681               default:
9682                 goto unknown;
9683             }
9684           }
9685
9686           goto unknown;
9687
9688         case 's':
9689           switch (name[1])
9690           {
9691             case 'e':
9692               if (name[2] == 't')
9693               {
9694                 switch (name[3])
9695                 {
9696                   case 'g':
9697                     if (name[4] == 'r' &&
9698                         name[5] == 'e' &&
9699                         name[6] == 'n' &&
9700                         name[7] == 't')
9701                     {                             /* setgrent   */
9702                       return -KEY_setgrent;
9703                     }
9704
9705                     goto unknown;
9706
9707                   case 'p':
9708                     if (name[4] == 'w' &&
9709                         name[5] == 'e' &&
9710                         name[6] == 'n' &&
9711                         name[7] == 't')
9712                     {                             /* setpwent   */
9713                       return -KEY_setpwent;
9714                     }
9715
9716                     goto unknown;
9717
9718                   default:
9719                     goto unknown;
9720                 }
9721               }
9722
9723               goto unknown;
9724
9725             case 'h':
9726               switch (name[2])
9727               {
9728                 case 'm':
9729                   if (name[3] == 'w' &&
9730                       name[4] == 'r' &&
9731                       name[5] == 'i' &&
9732                       name[6] == 't' &&
9733                       name[7] == 'e')
9734                   {                               /* shmwrite   */
9735                     return -KEY_shmwrite;
9736                   }
9737
9738                   goto unknown;
9739
9740                 case 'u':
9741                   if (name[3] == 't' &&
9742                       name[4] == 'd' &&
9743                       name[5] == 'o' &&
9744                       name[6] == 'w' &&
9745                       name[7] == 'n')
9746                   {                               /* shutdown   */
9747                     return -KEY_shutdown;
9748                   }
9749
9750                   goto unknown;
9751
9752                 default:
9753                   goto unknown;
9754               }
9755
9756             case 'y':
9757               if (name[2] == 's' &&
9758                   name[3] == 'w' &&
9759                   name[4] == 'r' &&
9760                   name[5] == 'i' &&
9761                   name[6] == 't' &&
9762                   name[7] == 'e')
9763               {                                   /* syswrite   */
9764                 return -KEY_syswrite;
9765               }
9766
9767               goto unknown;
9768
9769             default:
9770               goto unknown;
9771           }
9772
9773         case 't':
9774           if (name[1] == 'r' &&
9775               name[2] == 'u' &&
9776               name[3] == 'n' &&
9777               name[4] == 'c' &&
9778               name[5] == 'a' &&
9779               name[6] == 't' &&
9780               name[7] == 'e')
9781           {                                       /* truncate   */
9782             return -KEY_truncate;
9783           }
9784
9785           goto unknown;
9786
9787         default:
9788           goto unknown;
9789       }
9790
9791     case 9: /* 9 tokens of length 9 */
9792       switch (name[0])
9793       {
9794         case 'U':
9795           if (name[1] == 'N' &&
9796               name[2] == 'I' &&
9797               name[3] == 'T' &&
9798               name[4] == 'C' &&
9799               name[5] == 'H' &&
9800               name[6] == 'E' &&
9801               name[7] == 'C' &&
9802               name[8] == 'K')
9803           {                                       /* UNITCHECK  */
9804             return KEY_UNITCHECK;
9805           }
9806
9807           goto unknown;
9808
9809         case 'e':
9810           if (name[1] == 'n' &&
9811               name[2] == 'd' &&
9812               name[3] == 'n' &&
9813               name[4] == 'e' &&
9814               name[5] == 't' &&
9815               name[6] == 'e' &&
9816               name[7] == 'n' &&
9817               name[8] == 't')
9818           {                                       /* endnetent  */
9819             return -KEY_endnetent;
9820           }
9821
9822           goto unknown;
9823
9824         case 'g':
9825           if (name[1] == 'e' &&
9826               name[2] == 't' &&
9827               name[3] == 'n' &&
9828               name[4] == 'e' &&
9829               name[5] == 't' &&
9830               name[6] == 'e' &&
9831               name[7] == 'n' &&
9832               name[8] == 't')
9833           {                                       /* getnetent  */
9834             return -KEY_getnetent;
9835           }
9836
9837           goto unknown;
9838
9839         case 'l':
9840           if (name[1] == 'o' &&
9841               name[2] == 'c' &&
9842               name[3] == 'a' &&
9843               name[4] == 'l' &&
9844               name[5] == 't' &&
9845               name[6] == 'i' &&
9846               name[7] == 'm' &&
9847               name[8] == 'e')
9848           {                                       /* localtime  */
9849             return -KEY_localtime;
9850           }
9851
9852           goto unknown;
9853
9854         case 'p':
9855           if (name[1] == 'r' &&
9856               name[2] == 'o' &&
9857               name[3] == 't' &&
9858               name[4] == 'o' &&
9859               name[5] == 't' &&
9860               name[6] == 'y' &&
9861               name[7] == 'p' &&
9862               name[8] == 'e')
9863           {                                       /* prototype  */
9864             return KEY_prototype;
9865           }
9866
9867           goto unknown;
9868
9869         case 'q':
9870           if (name[1] == 'u' &&
9871               name[2] == 'o' &&
9872               name[3] == 't' &&
9873               name[4] == 'e' &&
9874               name[5] == 'm' &&
9875               name[6] == 'e' &&
9876               name[7] == 't' &&
9877               name[8] == 'a')
9878           {                                       /* quotemeta  */
9879             return -KEY_quotemeta;
9880           }
9881
9882           goto unknown;
9883
9884         case 'r':
9885           if (name[1] == 'e' &&
9886               name[2] == 'w' &&
9887               name[3] == 'i' &&
9888               name[4] == 'n' &&
9889               name[5] == 'd' &&
9890               name[6] == 'd' &&
9891               name[7] == 'i' &&
9892               name[8] == 'r')
9893           {                                       /* rewinddir  */
9894             return -KEY_rewinddir;
9895           }
9896
9897           goto unknown;
9898
9899         case 's':
9900           if (name[1] == 'e' &&
9901               name[2] == 't' &&
9902               name[3] == 'n' &&
9903               name[4] == 'e' &&
9904               name[5] == 't' &&
9905               name[6] == 'e' &&
9906               name[7] == 'n' &&
9907               name[8] == 't')
9908           {                                       /* setnetent  */
9909             return -KEY_setnetent;
9910           }
9911
9912           goto unknown;
9913
9914         case 'w':
9915           if (name[1] == 'a' &&
9916               name[2] == 'n' &&
9917               name[3] == 't' &&
9918               name[4] == 'a' &&
9919               name[5] == 'r' &&
9920               name[6] == 'r' &&
9921               name[7] == 'a' &&
9922               name[8] == 'y')
9923           {                                       /* wantarray  */
9924             return -KEY_wantarray;
9925           }
9926
9927           goto unknown;
9928
9929         default:
9930           goto unknown;
9931       }
9932
9933     case 10: /* 9 tokens of length 10 */
9934       switch (name[0])
9935       {
9936         case 'e':
9937           if (name[1] == 'n' &&
9938               name[2] == 'd')
9939           {
9940             switch (name[3])
9941             {
9942               case 'h':
9943                 if (name[4] == 'o' &&
9944                     name[5] == 's' &&
9945                     name[6] == 't' &&
9946                     name[7] == 'e' &&
9947                     name[8] == 'n' &&
9948                     name[9] == 't')
9949                 {                                 /* endhostent */
9950                   return -KEY_endhostent;
9951                 }
9952
9953                 goto unknown;
9954
9955               case 's':
9956                 if (name[4] == 'e' &&
9957                     name[5] == 'r' &&
9958                     name[6] == 'v' &&
9959                     name[7] == 'e' &&
9960                     name[8] == 'n' &&
9961                     name[9] == 't')
9962                 {                                 /* endservent */
9963                   return -KEY_endservent;
9964                 }
9965
9966                 goto unknown;
9967
9968               default:
9969                 goto unknown;
9970             }
9971           }
9972
9973           goto unknown;
9974
9975         case 'g':
9976           if (name[1] == 'e' &&
9977               name[2] == 't')
9978           {
9979             switch (name[3])
9980             {
9981               case 'h':
9982                 if (name[4] == 'o' &&
9983                     name[5] == 's' &&
9984                     name[6] == 't' &&
9985                     name[7] == 'e' &&
9986                     name[8] == 'n' &&
9987                     name[9] == 't')
9988                 {                                 /* gethostent */
9989                   return -KEY_gethostent;
9990                 }
9991
9992                 goto unknown;
9993
9994               case 's':
9995                 switch (name[4])
9996                 {
9997                   case 'e':
9998                     if (name[5] == 'r' &&
9999                         name[6] == 'v' &&
10000                         name[7] == 'e' &&
10001                         name[8] == 'n' &&
10002                         name[9] == 't')
10003                     {                             /* getservent */
10004                       return -KEY_getservent;
10005                     }
10006
10007                     goto unknown;
10008
10009                   case 'o':
10010                     if (name[5] == 'c' &&
10011                         name[6] == 'k' &&
10012                         name[7] == 'o' &&
10013                         name[8] == 'p' &&
10014                         name[9] == 't')
10015                     {                             /* getsockopt */
10016                       return -KEY_getsockopt;
10017                     }
10018
10019                     goto unknown;
10020
10021                   default:
10022                     goto unknown;
10023                 }
10024
10025               default:
10026                 goto unknown;
10027             }
10028           }
10029
10030           goto unknown;
10031
10032         case 's':
10033           switch (name[1])
10034           {
10035             case 'e':
10036               if (name[2] == 't')
10037               {
10038                 switch (name[3])
10039                 {
10040                   case 'h':
10041                     if (name[4] == 'o' &&
10042                         name[5] == 's' &&
10043                         name[6] == 't' &&
10044                         name[7] == 'e' &&
10045                         name[8] == 'n' &&
10046                         name[9] == 't')
10047                     {                             /* sethostent */
10048                       return -KEY_sethostent;
10049                     }
10050
10051                     goto unknown;
10052
10053                   case 's':
10054                     switch (name[4])
10055                     {
10056                       case 'e':
10057                         if (name[5] == 'r' &&
10058                             name[6] == 'v' &&
10059                             name[7] == 'e' &&
10060                             name[8] == 'n' &&
10061                             name[9] == 't')
10062                         {                         /* setservent */
10063                           return -KEY_setservent;
10064                         }
10065
10066                         goto unknown;
10067
10068                       case 'o':
10069                         if (name[5] == 'c' &&
10070                             name[6] == 'k' &&
10071                             name[7] == 'o' &&
10072                             name[8] == 'p' &&
10073                             name[9] == 't')
10074                         {                         /* setsockopt */
10075                           return -KEY_setsockopt;
10076                         }
10077
10078                         goto unknown;
10079
10080                       default:
10081                         goto unknown;
10082                     }
10083
10084                   default:
10085                     goto unknown;
10086                 }
10087               }
10088
10089               goto unknown;
10090
10091             case 'o':
10092               if (name[2] == 'c' &&
10093                   name[3] == 'k' &&
10094                   name[4] == 'e' &&
10095                   name[5] == 't' &&
10096                   name[6] == 'p' &&
10097                   name[7] == 'a' &&
10098                   name[8] == 'i' &&
10099                   name[9] == 'r')
10100               {                                   /* socketpair */
10101                 return -KEY_socketpair;
10102               }
10103
10104               goto unknown;
10105
10106             default:
10107               goto unknown;
10108           }
10109
10110         default:
10111           goto unknown;
10112       }
10113
10114     case 11: /* 8 tokens of length 11 */
10115       switch (name[0])
10116       {
10117         case '_':
10118           if (name[1] == '_' &&
10119               name[2] == 'P' &&
10120               name[3] == 'A' &&
10121               name[4] == 'C' &&
10122               name[5] == 'K' &&
10123               name[6] == 'A' &&
10124               name[7] == 'G' &&
10125               name[8] == 'E' &&
10126               name[9] == '_' &&
10127               name[10] == '_')
10128           {                                       /* __PACKAGE__ */
10129             return -KEY___PACKAGE__;
10130           }
10131
10132           goto unknown;
10133
10134         case 'e':
10135           if (name[1] == 'n' &&
10136               name[2] == 'd' &&
10137               name[3] == 'p' &&
10138               name[4] == 'r' &&
10139               name[5] == 'o' &&
10140               name[6] == 't' &&
10141               name[7] == 'o' &&
10142               name[8] == 'e' &&
10143               name[9] == 'n' &&
10144               name[10] == 't')
10145           {                                       /* endprotoent */
10146             return -KEY_endprotoent;
10147           }
10148
10149           goto unknown;
10150
10151         case 'g':
10152           if (name[1] == 'e' &&
10153               name[2] == 't')
10154           {
10155             switch (name[3])
10156             {
10157               case 'p':
10158                 switch (name[4])
10159                 {
10160                   case 'e':
10161                     if (name[5] == 'e' &&
10162                         name[6] == 'r' &&
10163                         name[7] == 'n' &&
10164                         name[8] == 'a' &&
10165                         name[9] == 'm' &&
10166                         name[10] == 'e')
10167                     {                             /* getpeername */
10168                       return -KEY_getpeername;
10169                     }
10170
10171                     goto unknown;
10172
10173                   case 'r':
10174                     switch (name[5])
10175                     {
10176                       case 'i':
10177                         if (name[6] == 'o' &&
10178                             name[7] == 'r' &&
10179                             name[8] == 'i' &&
10180                             name[9] == 't' &&
10181                             name[10] == 'y')
10182                         {                         /* getpriority */
10183                           return -KEY_getpriority;
10184                         }
10185
10186                         goto unknown;
10187
10188                       case 'o':
10189                         if (name[6] == 't' &&
10190                             name[7] == 'o' &&
10191                             name[8] == 'e' &&
10192                             name[9] == 'n' &&
10193                             name[10] == 't')
10194                         {                         /* getprotoent */
10195                           return -KEY_getprotoent;
10196                         }
10197
10198                         goto unknown;
10199
10200                       default:
10201                         goto unknown;
10202                     }
10203
10204                   default:
10205                     goto unknown;
10206                 }
10207
10208               case 's':
10209                 if (name[4] == 'o' &&
10210                     name[5] == 'c' &&
10211                     name[6] == 'k' &&
10212                     name[7] == 'n' &&
10213                     name[8] == 'a' &&
10214                     name[9] == 'm' &&
10215                     name[10] == 'e')
10216                 {                                 /* getsockname */
10217                   return -KEY_getsockname;
10218                 }
10219
10220                 goto unknown;
10221
10222               default:
10223                 goto unknown;
10224             }
10225           }
10226
10227           goto unknown;
10228
10229         case 's':
10230           if (name[1] == 'e' &&
10231               name[2] == 't' &&
10232               name[3] == 'p' &&
10233               name[4] == 'r')
10234           {
10235             switch (name[5])
10236             {
10237               case 'i':
10238                 if (name[6] == 'o' &&
10239                     name[7] == 'r' &&
10240                     name[8] == 'i' &&
10241                     name[9] == 't' &&
10242                     name[10] == 'y')
10243                 {                                 /* setpriority */
10244                   return -KEY_setpriority;
10245                 }
10246
10247                 goto unknown;
10248
10249               case 'o':
10250                 if (name[6] == 't' &&
10251                     name[7] == 'o' &&
10252                     name[8] == 'e' &&
10253                     name[9] == 'n' &&
10254                     name[10] == 't')
10255                 {                                 /* setprotoent */
10256                   return -KEY_setprotoent;
10257                 }
10258
10259                 goto unknown;
10260
10261               default:
10262                 goto unknown;
10263             }
10264           }
10265
10266           goto unknown;
10267
10268         default:
10269           goto unknown;
10270       }
10271
10272     case 12: /* 2 tokens of length 12 */
10273       if (name[0] == 'g' &&
10274           name[1] == 'e' &&
10275           name[2] == 't' &&
10276           name[3] == 'n' &&
10277           name[4] == 'e' &&
10278           name[5] == 't' &&
10279           name[6] == 'b' &&
10280           name[7] == 'y')
10281       {
10282         switch (name[8])
10283         {
10284           case 'a':
10285             if (name[9] == 'd' &&
10286                 name[10] == 'd' &&
10287                 name[11] == 'r')
10288             {                                     /* getnetbyaddr */
10289               return -KEY_getnetbyaddr;
10290             }
10291
10292             goto unknown;
10293
10294           case 'n':
10295             if (name[9] == 'a' &&
10296                 name[10] == 'm' &&
10297                 name[11] == 'e')
10298             {                                     /* getnetbyname */
10299               return -KEY_getnetbyname;
10300             }
10301
10302             goto unknown;
10303
10304           default:
10305             goto unknown;
10306         }
10307       }
10308
10309       goto unknown;
10310
10311     case 13: /* 4 tokens of length 13 */
10312       if (name[0] == 'g' &&
10313           name[1] == 'e' &&
10314           name[2] == 't')
10315       {
10316         switch (name[3])
10317         {
10318           case 'h':
10319             if (name[4] == 'o' &&
10320                 name[5] == 's' &&
10321                 name[6] == 't' &&
10322                 name[7] == 'b' &&
10323                 name[8] == 'y')
10324             {
10325               switch (name[9])
10326               {
10327                 case 'a':
10328                   if (name[10] == 'd' &&
10329                       name[11] == 'd' &&
10330                       name[12] == 'r')
10331                   {                               /* gethostbyaddr */
10332                     return -KEY_gethostbyaddr;
10333                   }
10334
10335                   goto unknown;
10336
10337                 case 'n':
10338                   if (name[10] == 'a' &&
10339                       name[11] == 'm' &&
10340                       name[12] == 'e')
10341                   {                               /* gethostbyname */
10342                     return -KEY_gethostbyname;
10343                   }
10344
10345                   goto unknown;
10346
10347                 default:
10348                   goto unknown;
10349               }
10350             }
10351
10352             goto unknown;
10353
10354           case 's':
10355             if (name[4] == 'e' &&
10356                 name[5] == 'r' &&
10357                 name[6] == 'v' &&
10358                 name[7] == 'b' &&
10359                 name[8] == 'y')
10360             {
10361               switch (name[9])
10362               {
10363                 case 'n':
10364                   if (name[10] == 'a' &&
10365                       name[11] == 'm' &&
10366                       name[12] == 'e')
10367                   {                               /* getservbyname */
10368                     return -KEY_getservbyname;
10369                   }
10370
10371                   goto unknown;
10372
10373                 case 'p':
10374                   if (name[10] == 'o' &&
10375                       name[11] == 'r' &&
10376                       name[12] == 't')
10377                   {                               /* getservbyport */
10378                     return -KEY_getservbyport;
10379                   }
10380
10381                   goto unknown;
10382
10383                 default:
10384                   goto unknown;
10385               }
10386             }
10387
10388             goto unknown;
10389
10390           default:
10391             goto unknown;
10392         }
10393       }
10394
10395       goto unknown;
10396
10397     case 14: /* 1 tokens of length 14 */
10398       if (name[0] == 'g' &&
10399           name[1] == 'e' &&
10400           name[2] == 't' &&
10401           name[3] == 'p' &&
10402           name[4] == 'r' &&
10403           name[5] == 'o' &&
10404           name[6] == 't' &&
10405           name[7] == 'o' &&
10406           name[8] == 'b' &&
10407           name[9] == 'y' &&
10408           name[10] == 'n' &&
10409           name[11] == 'a' &&
10410           name[12] == 'm' &&
10411           name[13] == 'e')
10412       {                                           /* getprotobyname */
10413         return -KEY_getprotobyname;
10414       }
10415
10416       goto unknown;
10417
10418     case 16: /* 1 tokens of length 16 */
10419       if (name[0] == 'g' &&
10420           name[1] == 'e' &&
10421           name[2] == 't' &&
10422           name[3] == 'p' &&
10423           name[4] == 'r' &&
10424           name[5] == 'o' &&
10425           name[6] == 't' &&
10426           name[7] == 'o' &&
10427           name[8] == 'b' &&
10428           name[9] == 'y' &&
10429           name[10] == 'n' &&
10430           name[11] == 'u' &&
10431           name[12] == 'm' &&
10432           name[13] == 'b' &&
10433           name[14] == 'e' &&
10434           name[15] == 'r')
10435       {                                           /* getprotobynumber */
10436         return -KEY_getprotobynumber;
10437       }
10438
10439       goto unknown;
10440
10441     default:
10442       goto unknown;
10443   }
10444
10445 unknown:
10446   return 0;
10447 }
10448
10449 STATIC void
10450 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10451 {
10452     dVAR;
10453
10454     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
10455         if (ckWARN(WARN_SYNTAX)) {
10456             int level = 1;
10457             const char *w;
10458             for (w = s+2; *w && level; w++) {
10459                 if (*w == '(')
10460                     ++level;
10461                 else if (*w == ')')
10462                     --level;
10463             }
10464             while (isSPACE(*w))
10465                 ++w;
10466             /* the list of chars below is for end of statements or
10467              * block / parens, boolean operators (&&, ||, //) and branch
10468              * constructs (or, and, if, until, unless, while, err, for).
10469              * Not a very solid hack... */
10470             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10471                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10472                             "%s (...) interpreted as function",name);
10473         }
10474     }
10475     while (s < PL_bufend && isSPACE(*s))
10476         s++;
10477     if (*s == '(')
10478         s++;
10479     while (s < PL_bufend && isSPACE(*s))
10480         s++;
10481     if (isIDFIRST_lazy_if(s,UTF)) {
10482         const char * const w = s++;
10483         while (isALNUM_lazy_if(s,UTF))
10484             s++;
10485         while (s < PL_bufend && isSPACE(*s))
10486             s++;
10487         if (*s == ',') {
10488             GV* gv;
10489             if (keyword(w, s - w, 0))
10490                 return;
10491
10492             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10493             if (gv && GvCVu(gv))
10494                 return;
10495             Perl_croak(aTHX_ "No comma allowed after %s", what);
10496         }
10497     }
10498 }
10499
10500 /* Either returns sv, or mortalizes sv and returns a new SV*.
10501    Best used as sv=new_constant(..., sv, ...).
10502    If s, pv are NULL, calls subroutine with one argument,
10503    and type is used with error messages only. */
10504
10505 STATIC SV *
10506 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10507                SV *sv, SV *pv, const char *type, STRLEN typelen)
10508 {
10509     dVAR; dSP;
10510     HV * const table = GvHV(PL_hintgv);          /* ^H */
10511     SV *res;
10512     SV **cvp;
10513     SV *cv, *typesv;
10514     const char *why1 = "", *why2 = "", *why3 = "";
10515
10516     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10517         SV *msg;
10518
10519         why2 = (const char *)
10520             (strEQ(key,"charnames")
10521              ? "(possibly a missing \"use charnames ...\")"
10522              : "");
10523         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10524                             (type ? type: "undef"), why2);
10525
10526         /* This is convoluted and evil ("goto considered harmful")
10527          * but I do not understand the intricacies of all the different
10528          * failure modes of %^H in here.  The goal here is to make
10529          * the most probable error message user-friendly. --jhi */
10530
10531         goto msgdone;
10532
10533     report:
10534         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10535                             (type ? type: "undef"), why1, why2, why3);
10536     msgdone:
10537         yyerror(SvPVX_const(msg));
10538         SvREFCNT_dec(msg);
10539         return sv;
10540     }
10541     cvp = hv_fetch(table, key, keylen, FALSE);
10542     if (!cvp || !SvOK(*cvp)) {
10543         why1 = "$^H{";
10544         why2 = key;
10545         why3 = "} is not defined";
10546         goto report;
10547     }
10548     sv_2mortal(sv);                     /* Parent created it permanently */
10549     cv = *cvp;
10550     if (!pv && s)
10551         pv = sv_2mortal(newSVpvn(s, len));
10552     if (type && pv)
10553         typesv = sv_2mortal(newSVpvn(type, typelen));
10554     else
10555         typesv = &PL_sv_undef;
10556
10557     PUSHSTACKi(PERLSI_OVERLOAD);
10558     ENTER ;
10559     SAVETMPS;
10560
10561     PUSHMARK(SP) ;
10562     EXTEND(sp, 3);
10563     if (pv)
10564         PUSHs(pv);
10565     PUSHs(sv);
10566     if (pv)
10567         PUSHs(typesv);
10568     PUTBACK;
10569     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10570
10571     SPAGAIN ;
10572
10573     /* Check the eval first */
10574     if (!PL_in_eval && SvTRUE(ERRSV)) {
10575         sv_catpvs(ERRSV, "Propagated");
10576         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10577         (void)POPs;
10578         res = SvREFCNT_inc_simple(sv);
10579     }
10580     else {
10581         res = POPs;
10582         SvREFCNT_inc_simple_void(res);
10583     }
10584
10585     PUTBACK ;
10586     FREETMPS ;
10587     LEAVE ;
10588     POPSTACK;
10589
10590     if (!SvOK(res)) {
10591         why1 = "Call to &{$^H{";
10592         why2 = key;
10593         why3 = "}} did not return a defined value";
10594         sv = res;
10595         goto report;
10596     }
10597
10598     return res;
10599 }
10600
10601 /* Returns a NUL terminated string, with the length of the string written to
10602    *slp
10603    */
10604 STATIC char *
10605 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10606 {
10607     dVAR;
10608     register char *d = dest;
10609     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10610     for (;;) {
10611         if (d >= e)
10612             Perl_croak(aTHX_ ident_too_long);
10613         if (isALNUM(*s))        /* UTF handled below */
10614             *d++ = *s++;
10615         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10616             *d++ = ':';
10617             *d++ = ':';
10618             s++;
10619         }
10620         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10621             *d++ = *s++;
10622             *d++ = *s++;
10623         }
10624         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10625             char *t = s + UTF8SKIP(s);
10626             size_t len;
10627             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10628                 t += UTF8SKIP(t);
10629             len = t - s;
10630             if (d + len > e)
10631                 Perl_croak(aTHX_ ident_too_long);
10632             Copy(s, d, len, char);
10633             d += len;
10634             s = t;
10635         }
10636         else {
10637             *d = '\0';
10638             *slp = d - dest;
10639             return s;
10640         }
10641     }
10642 }
10643
10644 STATIC char *
10645 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10646 {
10647     dVAR;
10648     char *bracket = NULL;
10649     char funny = *s++;
10650     register char *d = dest;
10651     register char * const e = d + destlen + 3;    /* two-character token, ending NUL */
10652
10653     if (isSPACE(*s))
10654         s = PEEKSPACE(s);
10655     if (isDIGIT(*s)) {
10656         while (isDIGIT(*s)) {
10657             if (d >= e)
10658                 Perl_croak(aTHX_ ident_too_long);
10659             *d++ = *s++;
10660         }
10661     }
10662     else {
10663         for (;;) {
10664             if (d >= e)
10665                 Perl_croak(aTHX_ ident_too_long);
10666             if (isALNUM(*s))    /* UTF handled below */
10667                 *d++ = *s++;
10668             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10669                 *d++ = ':';
10670                 *d++ = ':';
10671                 s++;
10672             }
10673             else if (*s == ':' && s[1] == ':') {
10674                 *d++ = *s++;
10675                 *d++ = *s++;
10676             }
10677             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10678                 char *t = s + UTF8SKIP(s);
10679                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10680                     t += UTF8SKIP(t);
10681                 if (d + (t - s) > e)
10682                     Perl_croak(aTHX_ ident_too_long);
10683                 Copy(s, d, t - s, char);
10684                 d += t - s;
10685                 s = t;
10686             }
10687             else
10688                 break;
10689         }
10690     }
10691     *d = '\0';
10692     d = dest;
10693     if (*d) {
10694         if (PL_lex_state != LEX_NORMAL)
10695             PL_lex_state = LEX_INTERPENDMAYBE;
10696         return s;
10697     }
10698     if (*s == '$' && s[1] &&
10699         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10700     {
10701         return s;
10702     }
10703     if (*s == '{') {
10704         bracket = s;
10705         s++;
10706     }
10707     else if (ck_uni)
10708         check_uni();
10709     if (s < send)
10710         *d = *s++;
10711     d[1] = '\0';
10712     if (*d == '^' && *s && isCONTROLVAR(*s)) {
10713         *d = toCTRL(*s);
10714         s++;
10715     }
10716     if (bracket) {
10717         if (isSPACE(s[-1])) {
10718             while (s < send) {
10719                 const char ch = *s++;
10720                 if (!SPACE_OR_TAB(ch)) {
10721                     *d = ch;
10722                     break;
10723                 }
10724             }
10725         }
10726         if (isIDFIRST_lazy_if(d,UTF)) {
10727             d++;
10728             if (UTF) {
10729                 char *end = s;
10730                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10731                     end += UTF8SKIP(end);
10732                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10733                         end += UTF8SKIP(end);
10734                 }
10735                 Copy(s, d, end - s, char);
10736                 d += end - s;
10737                 s = end;
10738             }
10739             else {
10740                 while ((isALNUM(*s) || *s == ':') && d < e)
10741                     *d++ = *s++;
10742                 if (d >= e)
10743                     Perl_croak(aTHX_ ident_too_long);
10744             }
10745             *d = '\0';
10746             while (s < send && SPACE_OR_TAB(*s))
10747                 s++;
10748             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10749                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10750                     const char * const brack =
10751                         (const char *)
10752                         ((*s == '[') ? "[...]" : "{...}");
10753                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10754                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10755                         funny, dest, brack, funny, dest, brack);
10756                 }
10757                 bracket++;
10758                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10759                 return s;
10760             }
10761         }
10762         /* Handle extended ${^Foo} variables
10763          * 1999-02-27 mjd-perl-patch@plover.com */
10764         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10765                  && isALNUM(*s))
10766         {
10767             d++;
10768             while (isALNUM(*s) && d < e) {
10769                 *d++ = *s++;
10770             }
10771             if (d >= e)
10772                 Perl_croak(aTHX_ ident_too_long);
10773             *d = '\0';
10774         }
10775         if (*s == '}') {
10776             s++;
10777             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10778                 PL_lex_state = LEX_INTERPEND;
10779                 PL_expect = XREF;
10780             }
10781             if (PL_lex_state == LEX_NORMAL) {
10782                 if (ckWARN(WARN_AMBIGUOUS) &&
10783                     (keyword(dest, d - dest, 0)
10784                      || get_cvn_flags(dest, d - dest, 0)))
10785                 {
10786                     if (funny == '#')
10787                         funny = '@';
10788                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10789                         "Ambiguous use of %c{%s} resolved to %c%s",
10790                         funny, dest, funny, dest);
10791                 }
10792             }
10793         }
10794         else {
10795             s = bracket;                /* let the parser handle it */
10796             *dest = '\0';
10797         }
10798     }
10799     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10800         PL_lex_state = LEX_INTERPEND;
10801     return s;
10802 }
10803
10804 void
10805 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10806 {
10807     PERL_UNUSED_CONTEXT;
10808     if (ch<256) {
10809         char c = (char)ch;
10810         switch (c) {
10811             CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10812             case GLOBAL_PAT_MOD:    *pmfl |= PMf_GLOBAL; break;
10813             case CONTINUE_PAT_MOD:  *pmfl |= PMf_CONTINUE; break;
10814             case ONCE_PAT_MOD:      *pmfl |= PMf_KEEP; break;
10815             case KEEPCOPY_PAT_MOD:  *pmfl |= PMf_KEEPCOPY; break;
10816         }
10817     }
10818 }
10819
10820 STATIC char *
10821 S_scan_pat(pTHX_ char *start, I32 type)
10822 {
10823     dVAR;
10824     PMOP *pm;
10825     char *s = scan_str(start,!!PL_madskills,FALSE);
10826     const char * const valid_flags =
10827         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10828 #ifdef PERL_MAD
10829     char *modstart;
10830 #endif
10831
10832
10833     if (!s) {
10834         const char * const delimiter = skipspace(start);
10835         Perl_croak(aTHX_
10836                    (const char *)
10837                    (*delimiter == '?'
10838                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
10839                     : "Search pattern not terminated" ));
10840     }
10841
10842     pm = (PMOP*)newPMOP(type, 0);
10843     if (PL_multi_open == '?') {
10844         /* This is the only point in the code that sets PMf_ONCE:  */
10845         pm->op_pmflags |= PMf_ONCE;
10846
10847         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10848            allows us to restrict the list needed by reset to just the ??
10849            matches.  */
10850         assert(type != OP_TRANS);
10851         if (PL_curstash) {
10852             MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
10853             U32 elements;
10854             if (!mg) {
10855                 mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
10856                                  0);
10857             }
10858             elements = mg->mg_len / sizeof(PMOP**);
10859             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10860             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10861             mg->mg_len = elements * sizeof(PMOP**);
10862             PmopSTASH_set(pm,PL_curstash);
10863         }
10864     }
10865 #ifdef PERL_MAD
10866     modstart = s;
10867 #endif
10868     while (*s && strchr(valid_flags, *s))
10869         pmflag(&pm->op_pmflags,*s++);
10870 #ifdef PERL_MAD
10871     if (PL_madskills && modstart != s) {
10872         SV* tmptoken = newSVpvn(modstart, s - modstart);
10873         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10874     }
10875 #endif
10876     /* issue a warning if /c is specified,but /g is not */
10877     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10878             && ckWARN(WARN_REGEXP))
10879     {
10880         Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10881             "Use of /c modifier is meaningless without /g" );
10882     }
10883
10884     PL_lex_op = (OP*)pm;
10885     yylval.ival = OP_MATCH;
10886     return s;
10887 }
10888
10889 STATIC char *
10890 S_scan_subst(pTHX_ char *start)
10891 {
10892     dVAR;
10893     register char *s;
10894     register PMOP *pm;
10895     I32 first_start;
10896     I32 es = 0;
10897 #ifdef PERL_MAD
10898     char *modstart;
10899 #endif
10900
10901     yylval.ival = OP_NULL;
10902
10903     s = scan_str(start,!!PL_madskills,FALSE);
10904
10905     if (!s)
10906         Perl_croak(aTHX_ "Substitution pattern not terminated");
10907
10908     if (s[-1] == PL_multi_open)
10909         s--;
10910 #ifdef PERL_MAD
10911     if (PL_madskills) {
10912         CURMAD('q', PL_thisopen);
10913         CURMAD('_', PL_thiswhite);
10914         CURMAD('E', PL_thisstuff);
10915         CURMAD('Q', PL_thisclose);
10916         PL_realtokenstart = s - SvPVX(PL_linestr);
10917     }
10918 #endif
10919
10920     first_start = PL_multi_start;
10921     s = scan_str(s,!!PL_madskills,FALSE);
10922     if (!s) {
10923         if (PL_lex_stuff) {
10924             SvREFCNT_dec(PL_lex_stuff);
10925             PL_lex_stuff = NULL;
10926         }
10927         Perl_croak(aTHX_ "Substitution replacement not terminated");
10928     }
10929     PL_multi_start = first_start;       /* so whole substitution is taken together */
10930
10931     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10932
10933 #ifdef PERL_MAD
10934     if (PL_madskills) {
10935         CURMAD('z', PL_thisopen);
10936         CURMAD('R', PL_thisstuff);
10937         CURMAD('Z', PL_thisclose);
10938     }
10939     modstart = s;
10940 #endif
10941
10942     while (*s) {
10943         if (*s == EXEC_PAT_MOD) {
10944             s++;
10945             es++;
10946         }
10947         else if (strchr(S_PAT_MODS, *s))
10948             pmflag(&pm->op_pmflags,*s++);
10949         else
10950             break;
10951     }
10952
10953 #ifdef PERL_MAD
10954     if (PL_madskills) {
10955         if (modstart != s)
10956             curmad('m', newSVpvn(modstart, s - modstart));
10957         append_madprops(PL_thismad, (OP*)pm, 0);
10958         PL_thismad = 0;
10959     }
10960 #endif
10961     if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10962         Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10963     }
10964
10965     if (es) {
10966         SV * const repl = newSVpvs("");
10967
10968         PL_sublex_info.super_bufptr = s;
10969         PL_sublex_info.super_bufend = PL_bufend;
10970         PL_multi_end = 0;
10971         pm->op_pmflags |= PMf_EVAL;
10972         while (es-- > 0) {
10973             if (es)
10974                 sv_catpvs(repl, "eval ");
10975             else
10976                 sv_catpvs(repl, "do ");
10977         }
10978         sv_catpvs(repl, "{");
10979         sv_catsv(repl, PL_lex_repl);
10980         if (strchr(SvPVX(PL_lex_repl), '#'))
10981             sv_catpvs(repl, "\n");
10982         sv_catpvs(repl, "}");
10983         SvEVALED_on(repl);
10984         SvREFCNT_dec(PL_lex_repl);
10985         PL_lex_repl = repl;
10986     }
10987
10988     PL_lex_op = (OP*)pm;
10989     yylval.ival = OP_SUBST;
10990     return s;
10991 }
10992
10993 STATIC char *
10994 S_scan_trans(pTHX_ char *start)
10995 {
10996     dVAR;
10997     register char* s;
10998     OP *o;
10999     short *tbl;
11000     I32 squash;
11001     I32 del;
11002     I32 complement;
11003 #ifdef PERL_MAD
11004     char *modstart;
11005 #endif
11006
11007     yylval.ival = OP_NULL;
11008
11009     s = scan_str(start,!!PL_madskills,FALSE);
11010     if (!s)
11011         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11012
11013     if (s[-1] == PL_multi_open)
11014         s--;
11015 #ifdef PERL_MAD
11016     if (PL_madskills) {
11017         CURMAD('q', PL_thisopen);
11018         CURMAD('_', PL_thiswhite);
11019         CURMAD('E', PL_thisstuff);
11020         CURMAD('Q', PL_thisclose);
11021         PL_realtokenstart = s - SvPVX(PL_linestr);
11022     }
11023 #endif
11024
11025     s = scan_str(s,!!PL_madskills,FALSE);
11026     if (!s) {
11027         if (PL_lex_stuff) {
11028             SvREFCNT_dec(PL_lex_stuff);
11029             PL_lex_stuff = NULL;
11030         }
11031         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11032     }
11033     if (PL_madskills) {
11034         CURMAD('z', PL_thisopen);
11035         CURMAD('R', PL_thisstuff);
11036         CURMAD('Z', PL_thisclose);
11037     }
11038
11039     complement = del = squash = 0;
11040 #ifdef PERL_MAD
11041     modstart = s;
11042 #endif
11043     while (1) {
11044         switch (*s) {
11045         case 'c':
11046             complement = OPpTRANS_COMPLEMENT;
11047             break;
11048         case 'd':
11049             del = OPpTRANS_DELETE;
11050             break;
11051         case 's':
11052             squash = OPpTRANS_SQUASH;
11053             break;
11054         default:
11055             goto no_more;
11056         }
11057         s++;
11058     }
11059   no_more:
11060
11061     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11062     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11063     o->op_private &= ~OPpTRANS_ALL;
11064     o->op_private |= del|squash|complement|
11065       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11066       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11067
11068     PL_lex_op = o;
11069     yylval.ival = OP_TRANS;
11070
11071 #ifdef PERL_MAD
11072     if (PL_madskills) {
11073         if (modstart != s)
11074             curmad('m', newSVpvn(modstart, s - modstart));
11075         append_madprops(PL_thismad, o, 0);
11076         PL_thismad = 0;
11077     }
11078 #endif
11079
11080     return s;
11081 }
11082
11083 STATIC char *
11084 S_scan_heredoc(pTHX_ register char *s)
11085 {
11086     dVAR;
11087     SV *herewas;
11088     I32 op_type = OP_SCALAR;
11089     I32 len;
11090     SV *tmpstr;
11091     char term;
11092     const char *found_newline;
11093     register char *d;
11094     register char *e;
11095     char *peek;
11096     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11097 #ifdef PERL_MAD
11098     I32 stuffstart = s - SvPVX(PL_linestr);
11099     char *tstart;
11100
11101     PL_realtokenstart = -1;
11102 #endif
11103
11104     s += 2;
11105     d = PL_tokenbuf;
11106     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11107     if (!outer)
11108         *d++ = '\n';
11109     peek = s;
11110     while (SPACE_OR_TAB(*peek))
11111         peek++;
11112     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11113         s = peek;
11114         term = *s++;
11115         s = delimcpy(d, e, s, PL_bufend, term, &len);
11116         d += len;
11117         if (s < PL_bufend)
11118             s++;
11119     }
11120     else {
11121         if (*s == '\\')
11122             s++, term = '\'';
11123         else
11124             term = '"';
11125         if (!isALNUM_lazy_if(s,UTF))
11126             deprecate_old("bare << to mean <<\"\"");
11127         for (; isALNUM_lazy_if(s,UTF); s++) {
11128             if (d < e)
11129                 *d++ = *s;
11130         }
11131     }
11132     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11133         Perl_croak(aTHX_ "Delimiter for here document is too long");
11134     *d++ = '\n';
11135     *d = '\0';
11136     len = d - PL_tokenbuf;
11137
11138 #ifdef PERL_MAD
11139     if (PL_madskills) {
11140         tstart = PL_tokenbuf + !outer;
11141         PL_thisclose = newSVpvn(tstart, len - !outer);
11142         tstart = SvPVX(PL_linestr) + stuffstart;
11143         PL_thisopen = newSVpvn(tstart, s - tstart);
11144         stuffstart = s - SvPVX(PL_linestr);
11145     }
11146 #endif
11147 #ifndef PERL_STRICT_CR
11148     d = strchr(s, '\r');
11149     if (d) {
11150         char * const olds = s;
11151         s = d;
11152         while (s < PL_bufend) {
11153             if (*s == '\r') {
11154                 *d++ = '\n';
11155                 if (*++s == '\n')
11156                     s++;
11157             }
11158             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11159                 *d++ = *s++;
11160                 s++;
11161             }
11162             else
11163                 *d++ = *s++;
11164         }
11165         *d = '\0';
11166         PL_bufend = d;
11167         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11168         s = olds;
11169     }
11170 #endif
11171 #ifdef PERL_MAD
11172     found_newline = 0;
11173 #endif
11174     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11175         herewas = newSVpvn(s,PL_bufend-s);
11176     }
11177     else {
11178 #ifdef PERL_MAD
11179         herewas = newSVpvn(s-1,found_newline-s+1);
11180 #else
11181         s--;
11182         herewas = newSVpvn(s,found_newline-s);
11183 #endif
11184     }
11185 #ifdef PERL_MAD
11186     if (PL_madskills) {
11187         tstart = SvPVX(PL_linestr) + stuffstart;
11188         if (PL_thisstuff)
11189             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11190         else
11191             PL_thisstuff = newSVpvn(tstart, s - tstart);
11192     }
11193 #endif
11194     s += SvCUR(herewas);
11195
11196 #ifdef PERL_MAD
11197     stuffstart = s - SvPVX(PL_linestr);
11198
11199     if (found_newline)
11200         s--;
11201 #endif
11202
11203     tmpstr = newSV_type(SVt_PVIV);
11204     SvGROW(tmpstr, 80);
11205     if (term == '\'') {
11206         op_type = OP_CONST;
11207         SvIV_set(tmpstr, -1);
11208     }
11209     else if (term == '`') {
11210         op_type = OP_BACKTICK;
11211         SvIV_set(tmpstr, '\\');
11212     }
11213
11214     CLINE;
11215     PL_multi_start = CopLINE(PL_curcop);
11216     PL_multi_open = PL_multi_close = '<';
11217     term = *PL_tokenbuf;
11218     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11219         char * const bufptr = PL_sublex_info.super_bufptr;
11220         char * const bufend = PL_sublex_info.super_bufend;
11221         char * const olds = s - SvCUR(herewas);
11222         s = strchr(bufptr, '\n');
11223         if (!s)
11224             s = bufend;
11225         d = s;
11226         while (s < bufend &&
11227           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11228             if (*s++ == '\n')
11229                 CopLINE_inc(PL_curcop);
11230         }
11231         if (s >= bufend) {
11232             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11233             missingterm(PL_tokenbuf);
11234         }
11235         sv_setpvn(herewas,bufptr,d-bufptr+1);
11236         sv_setpvn(tmpstr,d+1,s-d);
11237         s += len - 1;
11238         sv_catpvn(herewas,s,bufend-s);
11239         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11240
11241         s = olds;
11242         goto retval;
11243     }
11244     else if (!outer) {
11245         d = s;
11246         while (s < PL_bufend &&
11247           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11248             if (*s++ == '\n')
11249                 CopLINE_inc(PL_curcop);
11250         }
11251         if (s >= PL_bufend) {
11252             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11253             missingterm(PL_tokenbuf);
11254         }
11255         sv_setpvn(tmpstr,d+1,s-d);
11256 #ifdef PERL_MAD
11257         if (PL_madskills) {
11258             if (PL_thisstuff)
11259                 sv_catpvn(PL_thisstuff, d + 1, s - d);
11260             else
11261                 PL_thisstuff = newSVpvn(d + 1, s - d);
11262             stuffstart = s - SvPVX(PL_linestr);
11263         }
11264 #endif
11265         s += len - 1;
11266         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11267
11268         sv_catpvn(herewas,s,PL_bufend-s);
11269         sv_setsv(PL_linestr,herewas);
11270         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11271         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11272         PL_last_lop = PL_last_uni = NULL;
11273     }
11274     else
11275         sv_setpvn(tmpstr,"",0);   /* avoid "uninitialized" warning */
11276     while (s >= PL_bufend) {    /* multiple line string? */
11277 #ifdef PERL_MAD
11278         if (PL_madskills) {
11279             tstart = SvPVX(PL_linestr) + stuffstart;
11280             if (PL_thisstuff)
11281                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11282             else
11283                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11284         }
11285 #endif
11286         if (!outer ||
11287          !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11288             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11289             missingterm(PL_tokenbuf);
11290         }
11291 #ifdef PERL_MAD
11292         stuffstart = s - SvPVX(PL_linestr);
11293 #endif
11294         CopLINE_inc(PL_curcop);
11295         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11296         PL_last_lop = PL_last_uni = NULL;
11297 #ifndef PERL_STRICT_CR
11298         if (PL_bufend - PL_linestart >= 2) {
11299             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11300                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11301             {
11302                 PL_bufend[-2] = '\n';
11303                 PL_bufend--;
11304                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11305             }
11306             else if (PL_bufend[-1] == '\r')
11307                 PL_bufend[-1] = '\n';
11308         }
11309         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11310             PL_bufend[-1] = '\n';
11311 #endif
11312         if (PERLDB_LINE && PL_curstash != PL_debstash)
11313             update_debugger_info(PL_linestr, NULL, 0);
11314         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11315             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11316             *(SvPVX(PL_linestr) + off ) = ' ';
11317             sv_catsv(PL_linestr,herewas);
11318             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11319             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11320         }
11321         else {
11322             s = PL_bufend;
11323             sv_catsv(tmpstr,PL_linestr);
11324         }
11325     }
11326     s++;
11327 retval:
11328     PL_multi_end = CopLINE(PL_curcop);
11329     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11330         SvPV_shrink_to_cur(tmpstr);
11331     }
11332     SvREFCNT_dec(herewas);
11333     if (!IN_BYTES) {
11334         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11335             SvUTF8_on(tmpstr);
11336         else if (PL_encoding)
11337             sv_recode_to_utf8(tmpstr, PL_encoding);
11338     }
11339     PL_lex_stuff = tmpstr;
11340     yylval.ival = op_type;
11341     return s;
11342 }
11343
11344 /* scan_inputsymbol
11345    takes: current position in input buffer
11346    returns: new position in input buffer
11347    side-effects: yylval and lex_op are set.
11348
11349    This code handles:
11350
11351    <>           read from ARGV
11352    <FH>         read from filehandle
11353    <pkg::FH>    read from package qualified filehandle
11354    <pkg'FH>     read from package qualified filehandle
11355    <$fh>        read from filehandle in $fh
11356    <*.h>        filename glob
11357
11358 */
11359
11360 STATIC char *
11361 S_scan_inputsymbol(pTHX_ char *start)
11362 {
11363     dVAR;
11364     register char *s = start;           /* current position in buffer */
11365     char *end;
11366     I32 len;
11367
11368     char *d = PL_tokenbuf;                                      /* start of temp holding space */
11369     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
11370
11371     end = strchr(s, '\n');
11372     if (!end)
11373         end = PL_bufend;
11374     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
11375
11376     /* die if we didn't have space for the contents of the <>,
11377        or if it didn't end, or if we see a newline
11378     */
11379
11380     if (len >= (I32)sizeof PL_tokenbuf)
11381         Perl_croak(aTHX_ "Excessively long <> operator");
11382     if (s >= end)
11383         Perl_croak(aTHX_ "Unterminated <> operator");
11384
11385     s++;
11386
11387     /* check for <$fh>
11388        Remember, only scalar variables are interpreted as filehandles by
11389        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11390        treated as a glob() call.
11391        This code makes use of the fact that except for the $ at the front,
11392        a scalar variable and a filehandle look the same.
11393     */
11394     if (*d == '$' && d[1]) d++;
11395
11396     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11397     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11398         d++;
11399
11400     /* If we've tried to read what we allow filehandles to look like, and
11401        there's still text left, then it must be a glob() and not a getline.
11402        Use scan_str to pull out the stuff between the <> and treat it
11403        as nothing more than a string.
11404     */
11405
11406     if (d - PL_tokenbuf != len) {
11407         yylval.ival = OP_GLOB;
11408         s = scan_str(start,!!PL_madskills,FALSE);
11409         if (!s)
11410            Perl_croak(aTHX_ "Glob not terminated");
11411         return s;
11412     }
11413     else {
11414         bool readline_overriden = FALSE;
11415         GV *gv_readline;
11416         GV **gvp;
11417         /* we're in a filehandle read situation */
11418         d = PL_tokenbuf;
11419
11420         /* turn <> into <ARGV> */
11421         if (!len)
11422             Copy("ARGV",d,5,char);
11423
11424         /* Check whether readline() is overriden */
11425         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11426         if ((gv_readline
11427                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11428                 ||
11429                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11430                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11431                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11432             readline_overriden = TRUE;
11433
11434         /* if <$fh>, create the ops to turn the variable into a
11435            filehandle
11436         */
11437         if (*d == '$') {
11438             /* try to find it in the pad for this block, otherwise find
11439                add symbol table ops
11440             */
11441             const PADOFFSET tmp = pad_findmy(d);
11442             if (tmp != NOT_IN_PAD) {
11443                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11444                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11445                     HEK * const stashname = HvNAME_HEK(stash);
11446                     SV * const sym = sv_2mortal(newSVhek(stashname));
11447                     sv_catpvs(sym, "::");
11448                     sv_catpv(sym, d+1);
11449                     d = SvPVX(sym);
11450                     goto intro_sym;
11451                 }
11452                 else {
11453                     OP * const o = newOP(OP_PADSV, 0);
11454                     o->op_targ = tmp;
11455                     PL_lex_op = readline_overriden
11456                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11457                                 append_elem(OP_LIST, o,
11458                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11459                         : (OP*)newUNOP(OP_READLINE, 0, o);
11460                 }
11461             }
11462             else {
11463                 GV *gv;
11464                 ++d;
11465 intro_sym:
11466                 gv = gv_fetchpv(d,
11467                                 (PL_in_eval
11468                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
11469                                  : GV_ADDMULTI),
11470                                 SVt_PV);
11471                 PL_lex_op = readline_overriden
11472                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11473                             append_elem(OP_LIST,
11474                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11475                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11476                     : (OP*)newUNOP(OP_READLINE, 0,
11477                             newUNOP(OP_RV2SV, 0,
11478                                 newGVOP(OP_GV, 0, gv)));
11479             }
11480             if (!readline_overriden)
11481                 PL_lex_op->op_flags |= OPf_SPECIAL;
11482             /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11483             yylval.ival = OP_NULL;
11484         }
11485
11486         /* If it's none of the above, it must be a literal filehandle
11487            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11488         else {
11489             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11490             PL_lex_op = readline_overriden
11491                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11492                         append_elem(OP_LIST,
11493                             newGVOP(OP_GV, 0, gv),
11494                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11495                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11496             yylval.ival = OP_NULL;
11497         }
11498     }
11499
11500     return s;
11501 }
11502
11503
11504 /* scan_str
11505    takes: start position in buffer
11506           keep_quoted preserve \ on the embedded delimiter(s)
11507           keep_delims preserve the delimiters around the string
11508    returns: position to continue reading from buffer
11509    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11510         updates the read buffer.
11511
11512    This subroutine pulls a string out of the input.  It is called for:
11513         q               single quotes           q(literal text)
11514         '               single quotes           'literal text'
11515         qq              double quotes           qq(interpolate $here please)
11516         "               double quotes           "interpolate $here please"
11517         qx              backticks               qx(/bin/ls -l)
11518         `               backticks               `/bin/ls -l`
11519         qw              quote words             @EXPORT_OK = qw( func() $spam )
11520         m//             regexp match            m/this/
11521         s///            regexp substitute       s/this/that/
11522         tr///           string transliterate    tr/this/that/
11523         y///            string transliterate    y/this/that/
11524         ($*@)           sub prototypes          sub foo ($)
11525         (stuff)         sub attr parameters     sub foo : attr(stuff)
11526         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
11527
11528    In most of these cases (all but <>, patterns and transliterate)
11529    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11530    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11531    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11532    calls scan_str().
11533
11534    It skips whitespace before the string starts, and treats the first
11535    character as the delimiter.  If the delimiter is one of ([{< then
11536    the corresponding "close" character )]}> is used as the closing
11537    delimiter.  It allows quoting of delimiters, and if the string has
11538    balanced delimiters ([{<>}]) it allows nesting.
11539
11540    On success, the SV with the resulting string is put into lex_stuff or,
11541    if that is already non-NULL, into lex_repl. The second case occurs only
11542    when parsing the RHS of the special constructs s/// and tr/// (y///).
11543    For convenience, the terminating delimiter character is stuffed into
11544    SvIVX of the SV.
11545 */
11546
11547 STATIC char *
11548 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11549 {
11550     dVAR;
11551     SV *sv;                             /* scalar value: string */
11552     const char *tmps;                   /* temp string, used for delimiter matching */
11553     register char *s = start;           /* current position in the buffer */
11554     register char term;                 /* terminating character */
11555     register char *to;                  /* current position in the sv's data */
11556     I32 brackets = 1;                   /* bracket nesting level */
11557     bool has_utf8 = FALSE;              /* is there any utf8 content? */
11558     I32 termcode;                       /* terminating char. code */
11559     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
11560     STRLEN termlen;                     /* length of terminating string */
11561     int last_off = 0;                   /* last position for nesting bracket */
11562 #ifdef PERL_MAD
11563     int stuffstart;
11564     char *tstart;
11565 #endif
11566
11567     /* skip space before the delimiter */
11568     if (isSPACE(*s)) {
11569         s = PEEKSPACE(s);
11570     }
11571
11572 #ifdef PERL_MAD
11573     if (PL_realtokenstart >= 0) {
11574         stuffstart = PL_realtokenstart;
11575         PL_realtokenstart = -1;
11576     }
11577     else
11578         stuffstart = start - SvPVX(PL_linestr);
11579 #endif
11580     /* mark where we are, in case we need to report errors */
11581     CLINE;
11582
11583     /* after skipping whitespace, the next character is the terminator */
11584     term = *s;
11585     if (!UTF) {
11586         termcode = termstr[0] = term;
11587         termlen = 1;
11588     }
11589     else {
11590         termcode = utf8_to_uvchr((U8*)s, &termlen);
11591         Copy(s, termstr, termlen, U8);
11592         if (!UTF8_IS_INVARIANT(term))
11593             has_utf8 = TRUE;
11594     }
11595
11596     /* mark where we are */
11597     PL_multi_start = CopLINE(PL_curcop);
11598     PL_multi_open = term;
11599
11600     /* find corresponding closing delimiter */
11601     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11602         termcode = termstr[0] = term = tmps[5];
11603
11604     PL_multi_close = term;
11605
11606     /* create a new SV to hold the contents.  79 is the SV's initial length.
11607        What a random number. */
11608     sv = newSV_type(SVt_PVIV);
11609     SvGROW(sv, 80);
11610     SvIV_set(sv, termcode);
11611     (void)SvPOK_only(sv);               /* validate pointer */
11612
11613     /* move past delimiter and try to read a complete string */
11614     if (keep_delims)
11615         sv_catpvn(sv, s, termlen);
11616     s += termlen;
11617 #ifdef PERL_MAD
11618     tstart = SvPVX(PL_linestr) + stuffstart;
11619     if (!PL_thisopen && !keep_delims) {
11620         PL_thisopen = newSVpvn(tstart, s - tstart);
11621         stuffstart = s - SvPVX(PL_linestr);
11622     }
11623 #endif
11624     for (;;) {
11625         if (PL_encoding && !UTF) {
11626             bool cont = TRUE;
11627
11628             while (cont) {
11629                 int offset = s - SvPVX_const(PL_linestr);
11630                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11631                                            &offset, (char*)termstr, termlen);
11632                 const char * const ns = SvPVX_const(PL_linestr) + offset;
11633                 char * const svlast = SvEND(sv) - 1;
11634
11635                 for (; s < ns; s++) {
11636                     if (*s == '\n' && !PL_rsfp)
11637                         CopLINE_inc(PL_curcop);
11638                 }
11639                 if (!found)
11640                     goto read_more_line;
11641                 else {
11642                     /* handle quoted delimiters */
11643                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11644                         const char *t;
11645                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11646                             t--;
11647                         if ((svlast-1 - t) % 2) {
11648                             if (!keep_quoted) {
11649                                 *(svlast-1) = term;
11650                                 *svlast = '\0';
11651                                 SvCUR_set(sv, SvCUR(sv) - 1);
11652                             }
11653                             continue;
11654                         }
11655                     }
11656                     if (PL_multi_open == PL_multi_close) {
11657                         cont = FALSE;
11658                     }
11659                     else {
11660                         const char *t;
11661                         char *w;
11662                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11663                             /* At here, all closes are "was quoted" one,
11664                                so we don't check PL_multi_close. */
11665                             if (*t == '\\') {
11666                                 if (!keep_quoted && *(t+1) == PL_multi_open)
11667                                     t++;
11668                                 else
11669                                     *w++ = *t++;
11670                             }
11671                             else if (*t == PL_multi_open)
11672                                 brackets++;
11673
11674                             *w = *t;
11675                         }
11676                         if (w < t) {
11677                             *w++ = term;
11678                             *w = '\0';
11679                             SvCUR_set(sv, w - SvPVX_const(sv));
11680                         }
11681                         last_off = w - SvPVX(sv);
11682                         if (--brackets <= 0)
11683                             cont = FALSE;
11684                     }
11685                 }
11686             }
11687             if (!keep_delims) {
11688                 SvCUR_set(sv, SvCUR(sv) - 1);
11689                 *SvEND(sv) = '\0';
11690             }
11691             break;
11692         }
11693
11694         /* extend sv if need be */
11695         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11696         /* set 'to' to the next character in the sv's string */
11697         to = SvPVX(sv)+SvCUR(sv);
11698
11699         /* if open delimiter is the close delimiter read unbridle */
11700         if (PL_multi_open == PL_multi_close) {
11701             for (; s < PL_bufend; s++,to++) {
11702                 /* embedded newlines increment the current line number */
11703                 if (*s == '\n' && !PL_rsfp)
11704                     CopLINE_inc(PL_curcop);
11705                 /* handle quoted delimiters */
11706                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11707                     if (!keep_quoted && s[1] == term)
11708                         s++;
11709                 /* any other quotes are simply copied straight through */
11710                     else
11711                         *to++ = *s++;
11712                 }
11713                 /* terminate when run out of buffer (the for() condition), or
11714                    have found the terminator */
11715                 else if (*s == term) {
11716                     if (termlen == 1)
11717                         break;
11718                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11719                         break;
11720                 }
11721                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11722                     has_utf8 = TRUE;
11723                 *to = *s;
11724             }
11725         }
11726
11727         /* if the terminator isn't the same as the start character (e.g.,
11728            matched brackets), we have to allow more in the quoting, and
11729            be prepared for nested brackets.
11730         */
11731         else {
11732             /* read until we run out of string, or we find the terminator */
11733             for (; s < PL_bufend; s++,to++) {
11734                 /* embedded newlines increment the line count */
11735                 if (*s == '\n' && !PL_rsfp)
11736                     CopLINE_inc(PL_curcop);
11737                 /* backslashes can escape the open or closing characters */
11738                 if (*s == '\\' && s+1 < PL_bufend) {
11739                     if (!keep_quoted &&
11740                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11741                         s++;
11742                     else
11743                         *to++ = *s++;
11744                 }
11745                 /* allow nested opens and closes */
11746                 else if (*s == PL_multi_close && --brackets <= 0)
11747                     break;
11748                 else if (*s == PL_multi_open)
11749                     brackets++;
11750                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11751                     has_utf8 = TRUE;
11752                 *to = *s;
11753             }
11754         }
11755         /* terminate the copied string and update the sv's end-of-string */
11756         *to = '\0';
11757         SvCUR_set(sv, to - SvPVX_const(sv));
11758
11759         /*
11760          * this next chunk reads more into the buffer if we're not done yet
11761          */
11762
11763         if (s < PL_bufend)
11764             break;              /* handle case where we are done yet :-) */
11765
11766 #ifndef PERL_STRICT_CR
11767         if (to - SvPVX_const(sv) >= 2) {
11768             if ((to[-2] == '\r' && to[-1] == '\n') ||
11769                 (to[-2] == '\n' && to[-1] == '\r'))
11770             {
11771                 to[-2] = '\n';
11772                 to--;
11773                 SvCUR_set(sv, to - SvPVX_const(sv));
11774             }
11775             else if (to[-1] == '\r')
11776                 to[-1] = '\n';
11777         }
11778         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11779             to[-1] = '\n';
11780 #endif
11781
11782      read_more_line:
11783         /* if we're out of file, or a read fails, bail and reset the current
11784            line marker so we can report where the unterminated string began
11785         */
11786 #ifdef PERL_MAD
11787         if (PL_madskills) {
11788             char * const tstart = SvPVX(PL_linestr) + stuffstart;
11789             if (PL_thisstuff)
11790                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11791             else
11792                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11793         }
11794 #endif
11795         if (!PL_rsfp ||
11796          !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11797             sv_free(sv);
11798             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11799             return NULL;
11800         }
11801 #ifdef PERL_MAD
11802         stuffstart = 0;
11803 #endif
11804         /* we read a line, so increment our line counter */
11805         CopLINE_inc(PL_curcop);
11806
11807         /* update debugger info */
11808         if (PERLDB_LINE && PL_curstash != PL_debstash)
11809             update_debugger_info(PL_linestr, NULL, 0);
11810
11811         /* having changed the buffer, we must update PL_bufend */
11812         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11813         PL_last_lop = PL_last_uni = NULL;
11814     }
11815
11816     /* at this point, we have successfully read the delimited string */
11817
11818     if (!PL_encoding || UTF) {
11819 #ifdef PERL_MAD
11820         if (PL_madskills) {
11821             char * const tstart = SvPVX(PL_linestr) + stuffstart;
11822             const int len = s - tstart;
11823             if (PL_thisstuff)
11824                 sv_catpvn(PL_thisstuff, tstart, len);
11825             else
11826                 PL_thisstuff = newSVpvn(tstart, len);
11827             if (!PL_thisclose && !keep_delims)
11828                 PL_thisclose = newSVpvn(s,termlen);
11829         }
11830 #endif
11831
11832         if (keep_delims)
11833             sv_catpvn(sv, s, termlen);
11834         s += termlen;
11835     }
11836 #ifdef PERL_MAD
11837     else {
11838         if (PL_madskills) {
11839             char * const tstart = SvPVX(PL_linestr) + stuffstart;
11840             const int len = s - tstart - termlen;
11841             if (PL_thisstuff)
11842                 sv_catpvn(PL_thisstuff, tstart, len);
11843             else
11844                 PL_thisstuff = newSVpvn(tstart, len);
11845             if (!PL_thisclose && !keep_delims)
11846                 PL_thisclose = newSVpvn(s - termlen,termlen);
11847         }
11848     }
11849 #endif
11850     if (has_utf8 || PL_encoding)
11851         SvUTF8_on(sv);
11852
11853     PL_multi_end = CopLINE(PL_curcop);
11854
11855     /* if we allocated too much space, give some back */
11856     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11857         SvLEN_set(sv, SvCUR(sv) + 1);
11858         SvPV_renew(sv, SvLEN(sv));
11859     }
11860
11861     /* decide whether this is the first or second quoted string we've read
11862        for this op
11863     */
11864
11865     if (PL_lex_stuff)
11866         PL_lex_repl = sv;
11867     else
11868         PL_lex_stuff = sv;
11869     return s;
11870 }
11871
11872 /*
11873   scan_num
11874   takes: pointer to position in buffer
11875   returns: pointer to new position in buffer
11876   side-effects: builds ops for the constant in yylval.op
11877
11878   Read a number in any of the formats that Perl accepts:
11879
11880   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
11881   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
11882   0b[01](_?[01])*
11883   0[0-7](_?[0-7])*
11884   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11885
11886   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11887   thing it reads.
11888
11889   If it reads a number without a decimal point or an exponent, it will
11890   try converting the number to an integer and see if it can do so
11891   without loss of precision.
11892 */
11893
11894 char *
11895 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11896 {
11897     dVAR;
11898     register const char *s = start;     /* current position in buffer */
11899     register char *d;                   /* destination in temp buffer */
11900     register char *e;                   /* end of temp buffer */
11901     NV nv;                              /* number read, as a double */
11902     SV *sv = NULL;                      /* place to put the converted number */
11903     bool floatit;                       /* boolean: int or float? */
11904     const char *lastub = NULL;          /* position of last underbar */
11905     static char const number_too_long[] = "Number too long";
11906
11907     /* We use the first character to decide what type of number this is */
11908
11909     switch (*s) {
11910     default:
11911       Perl_croak(aTHX_ "panic: scan_num");
11912
11913     /* if it starts with a 0, it could be an octal number, a decimal in
11914        0.13 disguise, or a hexadecimal number, or a binary number. */
11915     case '0':
11916         {
11917           /* variables:
11918              u          holds the "number so far"
11919              shift      the power of 2 of the base
11920                         (hex == 4, octal == 3, binary == 1)
11921              overflowed was the number more than we can hold?
11922
11923              Shift is used when we add a digit.  It also serves as an "are
11924              we in octal/hex/binary?" indicator to disallow hex characters
11925              when in octal mode.
11926            */
11927             NV n = 0.0;
11928             UV u = 0;
11929             I32 shift;
11930             bool overflowed = FALSE;
11931             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11932             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11933             static const char* const bases[5] =
11934               { "", "binary", "", "octal", "hexadecimal" };
11935             static const char* const Bases[5] =
11936               { "", "Binary", "", "Octal", "Hexadecimal" };
11937             static const char* const maxima[5] =
11938               { "",
11939                 "0b11111111111111111111111111111111",
11940                 "",
11941                 "037777777777",
11942                 "0xffffffff" };
11943             const char *base, *Base, *max;
11944
11945             /* check for hex */
11946             if (s[1] == 'x') {
11947                 shift = 4;
11948                 s += 2;
11949                 just_zero = FALSE;
11950             } else if (s[1] == 'b') {
11951                 shift = 1;
11952                 s += 2;
11953                 just_zero = FALSE;
11954             }
11955             /* check for a decimal in disguise */
11956             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11957                 goto decimal;
11958             /* so it must be octal */
11959             else {
11960                 shift = 3;
11961                 s++;
11962             }
11963
11964             if (*s == '_') {
11965                if (ckWARN(WARN_SYNTAX))
11966                    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11967                                "Misplaced _ in number");
11968                lastub = s++;
11969             }
11970
11971             base = bases[shift];
11972             Base = Bases[shift];
11973             max  = maxima[shift];
11974
11975             /* read the rest of the number */
11976             for (;;) {
11977                 /* x is used in the overflow test,
11978                    b is the digit we're adding on. */
11979                 UV x, b;
11980
11981                 switch (*s) {
11982
11983                 /* if we don't mention it, we're done */
11984                 default:
11985                     goto out;
11986
11987                 /* _ are ignored -- but warned about if consecutive */
11988                 case '_':
11989                     if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11990                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11991                                     "Misplaced _ in number");
11992                     lastub = s++;
11993                     break;
11994
11995                 /* 8 and 9 are not octal */
11996                 case '8': case '9':
11997                     if (shift == 3)
11998                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11999                     /* FALL THROUGH */
12000
12001                 /* octal digits */
12002                 case '2': case '3': case '4':
12003                 case '5': case '6': case '7':
12004                     if (shift == 1)
12005                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12006                     /* FALL THROUGH */
12007
12008                 case '0': case '1':
12009                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12010                     goto digit;
12011
12012                 /* hex digits */
12013                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12014                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12015                     /* make sure they said 0x */
12016                     if (shift != 4)
12017                         goto out;
12018                     b = (*s++ & 7) + 9;
12019
12020                     /* Prepare to put the digit we have onto the end
12021                        of the number so far.  We check for overflows.
12022                     */
12023
12024                   digit:
12025                     just_zero = FALSE;
12026                     if (!overflowed) {
12027                         x = u << shift; /* make room for the digit */
12028
12029                         if ((x >> shift) != u
12030                             && !(PL_hints & HINT_NEW_BINARY)) {
12031                             overflowed = TRUE;
12032                             n = (NV) u;
12033                             if (ckWARN_d(WARN_OVERFLOW))
12034                                 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12035                                             "Integer overflow in %s number",
12036                                             base);
12037                         } else
12038                             u = x | b;          /* add the digit to the end */
12039                     }
12040                     if (overflowed) {
12041                         n *= nvshift[shift];
12042                         /* If an NV has not enough bits in its
12043                          * mantissa to represent an UV this summing of
12044                          * small low-order numbers is a waste of time
12045                          * (because the NV cannot preserve the
12046                          * low-order bits anyway): we could just
12047                          * remember when did we overflow and in the
12048                          * end just multiply n by the right
12049                          * amount. */
12050                         n += (NV) b;
12051                     }
12052                     break;
12053                 }
12054             }
12055
12056           /* if we get here, we had success: make a scalar value from
12057              the number.
12058           */
12059           out:
12060
12061             /* final misplaced underbar check */
12062             if (s[-1] == '_') {
12063                 if (ckWARN(WARN_SYNTAX))
12064                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12065             }
12066
12067             sv = newSV(0);
12068             if (overflowed) {
12069                 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12070                     Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12071                                 "%s number > %s non-portable",
12072                                 Base, max);
12073                 sv_setnv(sv, n);
12074             }
12075             else {
12076 #if UVSIZE > 4
12077                 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12078                     Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12079                                 "%s number > %s non-portable",
12080                                 Base, max);
12081 #endif
12082                 sv_setuv(sv, u);
12083             }
12084             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12085                 sv = new_constant(start, s - start, "integer",
12086                                   sv, NULL, NULL, 0);
12087             else if (PL_hints & HINT_NEW_BINARY)
12088                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12089         }
12090         break;
12091
12092     /*
12093       handle decimal numbers.
12094       we're also sent here when we read a 0 as the first digit
12095     */
12096     case '1': case '2': case '3': case '4': case '5':
12097     case '6': case '7': case '8': case '9': case '.':
12098       decimal:
12099         d = PL_tokenbuf;
12100         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12101         floatit = FALSE;
12102
12103         /* read next group of digits and _ and copy into d */
12104         while (isDIGIT(*s) || *s == '_') {
12105             /* skip underscores, checking for misplaced ones
12106                if -w is on
12107             */
12108             if (*s == '_') {
12109                 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12110                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12111                                 "Misplaced _ in number");
12112                 lastub = s++;
12113             }
12114             else {
12115                 /* check for end of fixed-length buffer */
12116                 if (d >= e)
12117                     Perl_croak(aTHX_ number_too_long);
12118                 /* if we're ok, copy the character */
12119                 *d++ = *s++;
12120             }
12121         }
12122
12123         /* final misplaced underbar check */
12124         if (lastub && s == lastub + 1) {
12125             if (ckWARN(WARN_SYNTAX))
12126                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12127         }
12128
12129         /* read a decimal portion if there is one.  avoid
12130            3..5 being interpreted as the number 3. followed
12131            by .5
12132         */
12133         if (*s == '.' && s[1] != '.') {
12134             floatit = TRUE;
12135             *d++ = *s++;
12136
12137             if (*s == '_') {
12138                 if (ckWARN(WARN_SYNTAX))
12139                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12140                                 "Misplaced _ in number");
12141                 lastub = s;
12142             }
12143
12144             /* copy, ignoring underbars, until we run out of digits.
12145             */
12146             for (; isDIGIT(*s) || *s == '_'; s++) {
12147                 /* fixed length buffer check */
12148                 if (d >= e)
12149                     Perl_croak(aTHX_ number_too_long);
12150                 if (*s == '_') {
12151                    if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12152                        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12153                                    "Misplaced _ in number");
12154                    lastub = s;
12155                 }
12156                 else
12157                     *d++ = *s;
12158             }
12159             /* fractional part ending in underbar? */
12160             if (s[-1] == '_') {
12161                 if (ckWARN(WARN_SYNTAX))
12162                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12163                                 "Misplaced _ in number");
12164             }
12165             if (*s == '.' && isDIGIT(s[1])) {
12166                 /* oops, it's really a v-string, but without the "v" */
12167                 s = start;
12168                 goto vstring;
12169             }
12170         }
12171
12172         /* read exponent part, if present */
12173         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12174             floatit = TRUE;
12175             s++;
12176
12177             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12178             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12179
12180             /* stray preinitial _ */
12181             if (*s == '_') {
12182                 if (ckWARN(WARN_SYNTAX))
12183                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12184                                 "Misplaced _ in number");
12185                 lastub = s++;
12186             }
12187
12188             /* allow positive or negative exponent */
12189             if (*s == '+' || *s == '-')
12190                 *d++ = *s++;
12191
12192             /* stray initial _ */
12193             if (*s == '_') {
12194                 if (ckWARN(WARN_SYNTAX))
12195                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12196                                 "Misplaced _ in number");
12197                 lastub = s++;
12198             }
12199
12200             /* read digits of exponent */
12201             while (isDIGIT(*s) || *s == '_') {
12202                 if (isDIGIT(*s)) {
12203                     if (d >= e)
12204                         Perl_croak(aTHX_ number_too_long);
12205                     *d++ = *s++;
12206                 }
12207                 else {
12208                    if (((lastub && s == lastub + 1) ||
12209                         (!isDIGIT(s[1]) && s[1] != '_'))
12210                     && ckWARN(WARN_SYNTAX))
12211                        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12212                                    "Misplaced _ in number");
12213                    lastub = s++;
12214                 }
12215             }
12216         }
12217
12218
12219         /* make an sv from the string */
12220         sv = newSV(0);
12221
12222         /*
12223            We try to do an integer conversion first if no characters
12224            indicating "float" have been found.
12225          */
12226
12227         if (!floatit) {
12228             UV uv;
12229             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12230
12231             if (flags == IS_NUMBER_IN_UV) {
12232               if (uv <= IV_MAX)
12233                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12234               else
12235                 sv_setuv(sv, uv);
12236             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12237               if (uv <= (UV) IV_MIN)
12238                 sv_setiv(sv, -(IV)uv);
12239               else
12240                 floatit = TRUE;
12241             } else
12242               floatit = TRUE;
12243         }
12244         if (floatit) {
12245             /* terminate the string */
12246             *d = '\0';
12247             nv = Atof(PL_tokenbuf);
12248             sv_setnv(sv, nv);
12249         }
12250
12251         if ( floatit
12252              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12253             const char *const key = floatit ? "float" : "integer";
12254             const STRLEN keylen = floatit ? 5 : 7;
12255             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12256                                 key, keylen, sv, NULL, NULL, 0);
12257         }
12258         break;
12259
12260     /* if it starts with a v, it could be a v-string */
12261     case 'v':
12262 vstring:
12263                 sv = newSV(5); /* preallocate storage space */
12264                 s = scan_vstring(s, PL_bufend, sv);
12265         break;
12266     }
12267
12268     /* make the op for the constant and return */
12269
12270     if (sv)
12271         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12272     else
12273         lvalp->opval = NULL;
12274
12275     return (char *)s;
12276 }
12277
12278 STATIC char *
12279 S_scan_formline(pTHX_ register char *s)
12280 {
12281     dVAR;
12282     register char *eol;
12283     register char *t;
12284     SV * const stuff = newSVpvs("");
12285     bool needargs = FALSE;
12286     bool eofmt = FALSE;
12287 #ifdef PERL_MAD
12288     char *tokenstart = s;
12289     SV* savewhite;
12290
12291     if (PL_madskills) {
12292         savewhite = PL_thiswhite;
12293         PL_thiswhite = 0;
12294     }
12295 #endif
12296
12297     while (!needargs) {
12298         if (*s == '.') {
12299             t = s+1;
12300 #ifdef PERL_STRICT_CR
12301             while (SPACE_OR_TAB(*t))
12302                 t++;
12303 #else
12304             while (SPACE_OR_TAB(*t) || *t == '\r')
12305                 t++;
12306 #endif
12307             if (*t == '\n' || t == PL_bufend) {
12308                 eofmt = TRUE;
12309                 break;
12310             }
12311         }
12312         if (PL_in_eval && !PL_rsfp) {
12313             eol = (char *) memchr(s,'\n',PL_bufend-s);
12314             if (!eol++)
12315                 eol = PL_bufend;
12316         }
12317         else
12318             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12319         if (*s != '#') {
12320             for (t = s; t < eol; t++) {
12321                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12322                     needargs = FALSE;
12323                     goto enough;        /* ~~ must be first line in formline */
12324                 }
12325                 if (*t == '@' || *t == '^')
12326                     needargs = TRUE;
12327             }
12328             if (eol > s) {
12329                 sv_catpvn(stuff, s, eol-s);
12330 #ifndef PERL_STRICT_CR
12331                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12332                     char *end = SvPVX(stuff) + SvCUR(stuff);
12333                     end[-2] = '\n';
12334                     end[-1] = '\0';
12335                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12336                 }
12337 #endif
12338             }
12339             else
12340               break;
12341         }
12342         s = (char*)eol;
12343         if (PL_rsfp) {
12344 #ifdef PERL_MAD
12345             if (PL_madskills) {
12346                 if (PL_thistoken)
12347                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12348                 else
12349                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12350             }
12351 #endif
12352             s = filter_gets(PL_linestr, PL_rsfp, 0);
12353 #ifdef PERL_MAD
12354             tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12355 #else
12356             PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12357 #endif
12358             PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12359             PL_last_lop = PL_last_uni = NULL;
12360             if (!s) {
12361                 s = PL_bufptr;
12362                 break;
12363             }
12364         }
12365         incline(s);
12366     }
12367   enough:
12368     if (SvCUR(stuff)) {
12369         PL_expect = XTERM;
12370         if (needargs) {
12371             PL_lex_state = LEX_NORMAL;
12372             start_force(PL_curforce);
12373             NEXTVAL_NEXTTOKE.ival = 0;
12374             force_next(',');
12375         }
12376         else
12377             PL_lex_state = LEX_FORMLINE;
12378         if (!IN_BYTES) {
12379             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12380                 SvUTF8_on(stuff);
12381             else if (PL_encoding)
12382                 sv_recode_to_utf8(stuff, PL_encoding);
12383         }
12384         start_force(PL_curforce);
12385         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12386         force_next(THING);
12387         start_force(PL_curforce);
12388         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12389         force_next(LSTOP);
12390     }
12391     else {
12392         SvREFCNT_dec(stuff);
12393         if (eofmt)
12394             PL_lex_formbrack = 0;
12395         PL_bufptr = s;
12396     }
12397 #ifdef PERL_MAD
12398     if (PL_madskills) {
12399         if (PL_thistoken)
12400             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12401         else
12402             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12403         PL_thiswhite = savewhite;
12404     }
12405 #endif
12406     return s;
12407 }
12408
12409 I32
12410 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12411 {
12412     dVAR;
12413     const I32 oldsavestack_ix = PL_savestack_ix;
12414     CV* const outsidecv = PL_compcv;
12415
12416     if (PL_compcv) {
12417         assert(SvTYPE(PL_compcv) == SVt_PVCV);
12418     }
12419     SAVEI32(PL_subline);
12420     save_item(PL_subname);
12421     SAVESPTR(PL_compcv);
12422
12423     PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12424     CvFLAGS(PL_compcv) |= flags;
12425
12426     PL_subline = CopLINE(PL_curcop);
12427     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12428     CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12429     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12430
12431     return oldsavestack_ix;
12432 }
12433
12434 #ifdef __SC__
12435 #pragma segment Perl_yylex
12436 #endif
12437 int
12438 Perl_yywarn(pTHX_ const char *s)
12439 {
12440     dVAR;
12441     PL_in_eval |= EVAL_WARNONLY;
12442     yyerror(s);
12443     PL_in_eval &= ~EVAL_WARNONLY;
12444     return 0;
12445 }
12446
12447 int
12448 Perl_yyerror(pTHX_ const char *s)
12449 {
12450     dVAR;
12451     const char *where = NULL;
12452     const char *context = NULL;
12453     int contlen = -1;
12454     SV *msg;
12455     int yychar  = PL_parser->yychar;
12456
12457     if (!yychar || (yychar == ';' && !PL_rsfp))
12458         where = "at EOF";
12459     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12460       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12461       PL_oldbufptr != PL_bufptr) {
12462         /*
12463                 Only for NetWare:
12464                 The code below is removed for NetWare because it abends/crashes on NetWare
12465                 when the script has error such as not having the closing quotes like:
12466                     if ($var eq "value)
12467                 Checking of white spaces is anyway done in NetWare code.
12468         */
12469 #ifndef NETWARE
12470         while (isSPACE(*PL_oldoldbufptr))
12471             PL_oldoldbufptr++;
12472 #endif
12473         context = PL_oldoldbufptr;
12474         contlen = PL_bufptr - PL_oldoldbufptr;
12475     }
12476     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12477       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12478         /*
12479                 Only for NetWare:
12480                 The code below is removed for NetWare because it abends/crashes on NetWare
12481                 when the script has error such as not having the closing quotes like:
12482                     if ($var eq "value)
12483                 Checking of white spaces is anyway done in NetWare code.
12484         */
12485 #ifndef NETWARE
12486         while (isSPACE(*PL_oldbufptr))
12487             PL_oldbufptr++;
12488 #endif
12489         context = PL_oldbufptr;
12490         contlen = PL_bufptr - PL_oldbufptr;
12491     }
12492     else if (yychar > 255)
12493         where = "next token ???";
12494     else if (yychar == -2) { /* YYEMPTY */
12495         if (PL_lex_state == LEX_NORMAL ||
12496            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12497             where = "at end of line";
12498         else if (PL_lex_inpat)
12499             where = "within pattern";
12500         else
12501             where = "within string";
12502     }
12503     else {
12504         SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12505         if (yychar < 32)
12506             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12507         else if (isPRINT_LC(yychar)) {
12508             const char string = yychar;
12509             sv_catpvn(where_sv, &string, 1);
12510         }
12511         else
12512             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12513         where = SvPVX_const(where_sv);
12514     }
12515     msg = sv_2mortal(newSVpv(s, 0));
12516     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12517         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12518     if (context)
12519         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12520     else
12521         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12522     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12523         Perl_sv_catpvf(aTHX_ msg,
12524         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12525                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12526         PL_multi_end = 0;
12527     }
12528     if (PL_in_eval & EVAL_WARNONLY) {
12529         if (ckWARN_d(WARN_SYNTAX))
12530             Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12531     }
12532     else
12533         qerror(msg);
12534     if (PL_error_count >= 10) {
12535         if (PL_in_eval && SvCUR(ERRSV))
12536             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12537                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
12538         else
12539             Perl_croak(aTHX_ "%s has too many errors.\n",
12540             OutCopFILE(PL_curcop));
12541     }
12542     PL_in_my = 0;
12543     PL_in_my_stash = NULL;
12544     return 0;
12545 }
12546 #ifdef __SC__
12547 #pragma segment Main
12548 #endif
12549
12550 STATIC char*
12551 S_swallow_bom(pTHX_ U8 *s)
12552 {
12553     dVAR;
12554     const STRLEN slen = SvCUR(PL_linestr);
12555     switch (s[0]) {
12556     case 0xFF:
12557         if (s[1] == 0xFE) {
12558             /* UTF-16 little-endian? (or UTF32-LE?) */
12559             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12560                 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12561 #ifndef PERL_NO_UTF16_FILTER
12562             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12563             s += 2;
12564         utf16le:
12565             if (PL_bufend > (char*)s) {
12566                 U8 *news;
12567                 I32 newlen;
12568
12569                 filter_add(utf16rev_textfilter, NULL);
12570                 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12571                 utf16_to_utf8_reversed(s, news,
12572                                        PL_bufend - (char*)s - 1,
12573                                        &newlen);
12574                 sv_setpvn(PL_linestr, (const char*)news, newlen);
12575 #ifdef PERL_MAD
12576                 s = (U8*)SvPVX(PL_linestr);
12577                 Copy(news, s, newlen, U8);
12578                 s[newlen] = '\0';
12579 #endif
12580                 Safefree(news);
12581                 SvUTF8_on(PL_linestr);
12582                 s = (U8*)SvPVX(PL_linestr);
12583 #ifdef PERL_MAD
12584                 /* FIXME - is this a general bug fix?  */
12585                 s[newlen] = '\0';
12586 #endif
12587                 PL_bufend = SvPVX(PL_linestr) + newlen;
12588             }
12589 #else
12590             Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12591 #endif
12592         }
12593         break;
12594     case 0xFE:
12595         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12596 #ifndef PERL_NO_UTF16_FILTER
12597             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12598             s += 2;
12599         utf16be:
12600             if (PL_bufend > (char *)s) {
12601                 U8 *news;
12602                 I32 newlen;
12603
12604                 filter_add(utf16_textfilter, NULL);
12605                 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12606                 utf16_to_utf8(s, news,
12607                               PL_bufend - (char*)s,
12608                               &newlen);
12609                 sv_setpvn(PL_linestr, (const char*)news, newlen);
12610                 Safefree(news);
12611                 SvUTF8_on(PL_linestr);
12612                 s = (U8*)SvPVX(PL_linestr);
12613                 PL_bufend = SvPVX(PL_linestr) + newlen;
12614             }
12615 #else
12616             Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12617 #endif
12618         }
12619         break;
12620     case 0xEF:
12621         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12622             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12623             s += 3;                      /* UTF-8 */
12624         }
12625         break;
12626     case 0:
12627         if (slen > 3) {
12628              if (s[1] == 0) {
12629                   if (s[2] == 0xFE && s[3] == 0xFF) {
12630                        /* UTF-32 big-endian */
12631                        Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12632                   }
12633              }
12634              else if (s[2] == 0 && s[3] != 0) {
12635                   /* Leading bytes
12636                    * 00 xx 00 xx
12637                    * are a good indicator of UTF-16BE. */
12638                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12639                   goto utf16be;
12640              }
12641         }
12642 #ifdef EBCDIC
12643     case 0xDD:
12644         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12645             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12646             s += 4;                      /* UTF-8 */
12647         }
12648         break;
12649 #endif
12650
12651     default:
12652          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12653                   /* Leading bytes
12654                    * xx 00 xx 00
12655                    * are a good indicator of UTF-16LE. */
12656               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12657               goto utf16le;
12658          }
12659     }
12660     return (char*)s;
12661 }
12662
12663
12664 #ifndef PERL_NO_UTF16_FILTER
12665 static I32
12666 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12667 {
12668     dVAR;
12669     const STRLEN old = SvCUR(sv);
12670     const I32 count = FILTER_READ(idx+1, sv, maxlen);
12671     DEBUG_P(PerlIO_printf(Perl_debug_log,
12672                           "utf16_textfilter(%p): %d %d (%d)\n",
12673                           FPTR2DPTR(void *, utf16_textfilter),
12674                           idx, maxlen, (int) count));
12675     if (count) {
12676         U8* tmps;
12677         I32 newlen;
12678         Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12679         Copy(SvPVX_const(sv), tmps, old, char);
12680         utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12681                       SvCUR(sv) - old, &newlen);
12682         sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12683     }
12684     DEBUG_P({sv_dump(sv);});
12685     return SvCUR(sv);
12686 }
12687
12688 static I32
12689 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12690 {
12691     dVAR;
12692     const STRLEN old = SvCUR(sv);
12693     const I32 count = FILTER_READ(idx+1, sv, maxlen);
12694     DEBUG_P(PerlIO_printf(Perl_debug_log,
12695                           "utf16rev_textfilter(%p): %d %d (%d)\n",
12696                           FPTR2DPTR(void *, utf16rev_textfilter),
12697                           idx, maxlen, (int) count));
12698     if (count) {
12699         U8* tmps;
12700         I32 newlen;
12701         Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12702         Copy(SvPVX_const(sv), tmps, old, char);
12703         utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12704                       SvCUR(sv) - old, &newlen);
12705         sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12706     }
12707     DEBUG_P({ sv_dump(sv); });
12708     return count;
12709 }
12710 #endif
12711
12712 /*
12713 Returns a pointer to the next character after the parsed
12714 vstring, as well as updating the passed in sv.
12715
12716 Function must be called like
12717
12718         sv = newSV(5);
12719         s = scan_vstring(s,e,sv);
12720
12721 where s and e are the start and end of the string.
12722 The sv should already be large enough to store the vstring
12723 passed in, for performance reasons.
12724
12725 */
12726
12727 char *
12728 Perl_scan_vstring(pTHX_ const char *s, const char *e, SV *sv)
12729 {
12730     dVAR;
12731     const char *pos = s;
12732     const char *start = s;
12733     if (*pos == 'v') pos++;  /* get past 'v' */
12734     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12735         pos++;
12736     if ( *pos != '.') {
12737         /* this may not be a v-string if followed by => */
12738         const char *next = pos;
12739         while (next < e && isSPACE(*next))
12740             ++next;
12741         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12742             /* return string not v-string */
12743             sv_setpvn(sv,(char *)s,pos-s);
12744             return (char *)pos;
12745         }
12746     }
12747
12748     if (!isALPHA(*pos)) {
12749         U8 tmpbuf[UTF8_MAXBYTES+1];
12750
12751         if (*s == 'v')
12752             s++;  /* get past 'v' */
12753
12754         sv_setpvn(sv, "", 0);
12755
12756         for (;;) {
12757             /* this is atoi() that tolerates underscores */
12758             U8 *tmpend;
12759             UV rev = 0;
12760             const char *end = pos;
12761             UV mult = 1;
12762             while (--end >= s) {
12763                 if (*end != '_') {
12764                     const UV orev = rev;
12765                     rev += (*end - '0') * mult;
12766                     mult *= 10;
12767                     if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12768                         Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12769                                     "Integer overflow in decimal number");
12770                 }
12771             }
12772 #ifdef EBCDIC
12773             if (rev > 0x7FFFFFFF)
12774                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12775 #endif
12776             /* Append native character for the rev point */
12777             tmpend = uvchr_to_utf8(tmpbuf, rev);
12778             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12779             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12780                  SvUTF8_on(sv);
12781             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12782                  s = ++pos;
12783             else {
12784                  s = pos;
12785                  break;
12786             }
12787             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12788                  pos++;
12789         }
12790         SvPOK_on(sv);
12791         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12792         SvRMAGICAL_on(sv);
12793     }
12794     return (char *)s;
12795 }
12796
12797 /*
12798  * Local variables:
12799  * c-indentation-style: bsd
12800  * c-basic-offset: 4
12801  * indent-tabs-mode: t
12802  * End:
12803  *
12804  * ex: set ts=8 sts=4 sw=4 noet:
12805  */