toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("switch")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 940 {
 941     char *bufptr;
 942     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 943     if (flags & ~(LEX_STUFF_UTF8))
 944         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 945     if (UTF) {
 946         if (flags & LEX_STUFF_UTF8) {
 947             goto plain_copy;
 948         } else {
 949             STRLEN highhalf = 0;
 950             char *p, *e = pv+len;
 951             for (p = pv; p != e; p++)
 952                 highhalf += !!(((U8)*p) & 0x80);
 953             if (!highhalf)
 954                 goto plain_copy;
 955             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 956             bufptr = PL_parser->bufptr;
 957             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 958             PL_parser->bufend += len+highhalf;
 959             for (p = pv; p != e; p++) {
 960                 U8 c = (U8)*p;
 961                 if (c & 0x80) {
 962                     *bufptr++ = (char)(0xc0 | (c >> 6));
 963                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 964                 } else {
 965                     *bufptr++ = (char)c;
 966                 }
 967             }
 968         }
 969     } else {
 970         if (flags & LEX_STUFF_UTF8) {
 971             STRLEN highhalf = 0;
 972             char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 U8 c = (U8)*p;
 975                 if (c >= 0xc4) {
 976                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 977                                 "non-Latin-1 character into Latin-1 input");
 978                 } else if (c >= 0xc2 && p+1 != e &&
 979                             (((U8)p[1]) & 0xc0) == 0x80) {
 980                     p++;
 981                     highhalf++;
 982                 } else if (c >= 0x80) {
 983                     /* malformed UTF-8 */
 984                     ENTER;
 985                     SAVESPTR(PL_warnhook);
 986                     PL_warnhook = PERL_WARNHOOK_FATAL;
 987                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 988                     LEAVE;
 989                 }
 990             }
 991             if (!highhalf)
 992                 goto plain_copy;
 993             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 994             bufptr = PL_parser->bufptr;
 995             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 996             PL_parser->bufend += len-highhalf;
 997             for (p = pv; p != e; p++) {
 998                 U8 c = (U8)*p;
 999                 if (c & 0x80) {
1000                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001                     p++;
1002                 } else {
1003                     *bufptr++ = (char)c;
1004                 }
1005             }
1006         } else {
1007             plain_copy:
1008             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009             bufptr = PL_parser->bufptr;
1010             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is the string value of I<sv>.  The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039 {
1040     char *pv;
1041     STRLEN len;
1042     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043     if (flags)
1044         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045     pv = SvPV(sv, len);
1046     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1056
1057 This is not the normal way to consume lexed text.  For that, use
1058 L</lex_read_to>.
1059
1060 =cut
1061 */
1062
1063 void
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1065 {
1066     char *buf, *bufend;
1067     STRLEN unstuff_len;
1068     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069     buf = PL_parser->bufptr;
1070     if (ptr < buf)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072     if (ptr == buf)
1073         return;
1074     bufend = PL_parser->bufend;
1075     if (ptr > bufend)
1076         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077     unstuff_len = ptr - buf;
1078     Move(ptr, buf, bufend+1-ptr, char);
1079     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080     PL_parser->bufend = bufend - unstuff_len;
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1085
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1090
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_read_to(pTHX_ char *ptr)
1100 {
1101     char *s;
1102     PERL_ARGS_ASSERT_LEX_READ_TO;
1103     s = PL_parser->bufptr;
1104     if (ptr < s || ptr > PL_parser->bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106     for (; s != ptr; s++)
1107         if (*s == '\n') {
1108             CopLINE_inc(PL_curcop);
1109             PL_parser->linestart = s+1;
1110         }
1111     PL_parser->bufptr = ptr;
1112 }
1113
1114 /*
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>.  The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately.  I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1122
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it.  However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer fof
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1130
1131 =cut
1132 */
1133
1134 void
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1136 {
1137     char *buf;
1138     STRLEN discard_len;
1139     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140     buf = SvPVX(PL_parser->linestr);
1141     if (ptr < buf)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143     if (ptr == buf)
1144         return;
1145     if (ptr > PL_parser->bufptr)
1146         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147     discard_len = ptr - buf;
1148     if (PL_parser->oldbufptr < ptr)
1149         PL_parser->oldbufptr = ptr;
1150     if (PL_parser->oldoldbufptr < ptr)
1151         PL_parser->oldoldbufptr = ptr;
1152     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153         PL_parser->last_uni = NULL;
1154     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155         PL_parser->last_lop = NULL;
1156     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158     PL_parser->bufend -= discard_len;
1159     PL_parser->bufptr -= discard_len;
1160     PL_parser->oldbufptr -= discard_len;
1161     PL_parser->oldoldbufptr -= discard_len;
1162     if (PL_parser->last_uni)
1163         PL_parser->last_uni -= discard_len;
1164     if (PL_parser->last_lop)
1165         PL_parser->last_lop -= discard_len;
1166 }
1167
1168 /*
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more.  It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1176
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded.  If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1183
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1186
1187 =cut
1188 */
1189
1190 #define LEX_FAKE_EOF 0x80000000
1191
1192 bool
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1194 {
1195     SV *linestr;
1196     char *buf;
1197     STRLEN old_bufend_pos, new_bufend_pos;
1198     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200     bool got_some_for_debugger = 0;
1201     bool got_some;
1202     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1204     linestr = PL_parser->linestr;
1205     buf = SvPVX(linestr);
1206     if (!(flags & LEX_KEEP_PREVIOUS) &&
1207             PL_parser->bufptr == PL_parser->bufend) {
1208         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1209         linestart_pos = 0;
1210         if (PL_parser->last_uni != PL_parser->bufend)
1211             PL_parser->last_uni = NULL;
1212         if (PL_parser->last_lop != PL_parser->bufend)
1213             PL_parser->last_lop = NULL;
1214         last_uni_pos = last_lop_pos = 0;
1215         *buf = 0;
1216         SvCUR(linestr) = 0;
1217     } else {
1218         old_bufend_pos = PL_parser->bufend - buf;
1219         bufptr_pos = PL_parser->bufptr - buf;
1220         oldbufptr_pos = PL_parser->oldbufptr - buf;
1221         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222         linestart_pos = PL_parser->linestart - buf;
1223         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1225     }
1226     if (flags & LEX_FAKE_EOF) {
1227         goto eof;
1228     } else if (!PL_parser->rsfp) {
1229         got_some = 0;
1230     } else if (filter_gets(linestr, old_bufend_pos)) {
1231         got_some = 1;
1232         got_some_for_debugger = 1;
1233     } else {
1234         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1235             sv_setpvs(linestr, "");
1236         eof:
1237         /* End of real input.  Close filehandle (unless it was STDIN),
1238          * then add implicit termination.
1239          */
1240         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241             PerlIO_clearerr(PL_parser->rsfp);
1242         else if (PL_parser->rsfp)
1243             (void)PerlIO_close(PL_parser->rsfp);
1244         PL_parser->rsfp = NULL;
1245         PL_doextract = FALSE;
1246 #ifdef PERL_MAD
1247         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1248             PL_faketokens = 1;
1249 #endif
1250         if (!PL_in_eval && PL_minus_p) {
1251             sv_catpvs(linestr,
1252                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253             PL_minus_n = PL_minus_p = 0;
1254         } else if (!PL_in_eval && PL_minus_n) {
1255             sv_catpvs(linestr, /*{*/";}");
1256             PL_minus_n = 0;
1257         } else
1258             sv_catpvs(linestr, ";");
1259         got_some = 1;
1260     }
1261     buf = SvPVX(linestr);
1262     new_bufend_pos = SvCUR(linestr);
1263     PL_parser->bufend = buf + new_bufend_pos;
1264     PL_parser->bufptr = buf + bufptr_pos;
1265     PL_parser->oldbufptr = buf + oldbufptr_pos;
1266     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267     PL_parser->linestart = buf + linestart_pos;
1268     if (PL_parser->last_uni)
1269         PL_parser->last_uni = buf + last_uni_pos;
1270     if (PL_parser->last_lop)
1271         PL_parser->last_lop = buf + last_lop_pos;
1272     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1273             PL_curstash != PL_debstash) {
1274         /* debugger active and we're not compiling the debugger code,
1275          * so store the line into the debugger's array of lines
1276          */
1277         update_debugger_info(NULL, buf+old_bufend_pos,
1278             new_bufend_pos-old_bufend_pos);
1279     }
1280     return got_some;
1281 }
1282
1283 /*
1284 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1285
1286 Looks ahead one (Unicode) character in the text currently being lexed.
1287 Returns the codepoint (unsigned integer value) of the next character,
1288 or -1 if lexing has reached the end of the input text.  To consume the
1289 peeked character, use L</lex_read_unichar>.
1290
1291 If the next character is in (or extends into) the next chunk of input
1292 text, the next chunk will be read in.  Normally the current chunk will be
1293 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294 then the current chunk will not be discarded.
1295
1296 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297 is encountered, an exception is generated.
1298
1299 =cut
1300 */
1301
1302 I32
1303 Perl_lex_peek_unichar(pTHX_ U32 flags)
1304 {
1305     char *s, *bufend;
1306     if (flags & ~(LEX_KEEP_PREVIOUS))
1307         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1308     s = PL_parser->bufptr;
1309     bufend = PL_parser->bufend;
1310     if (UTF) {
1311         U8 head;
1312         I32 unichar;
1313         STRLEN len, retlen;
1314         if (s == bufend) {
1315             if (!lex_next_chunk(flags))
1316                 return -1;
1317             s = PL_parser->bufptr;
1318             bufend = PL_parser->bufend;
1319         }
1320         head = (U8)*s;
1321         if (!(head & 0x80))
1322             return head;
1323         if (head & 0x40) {
1324             len = PL_utf8skip[head];
1325             while ((STRLEN)(bufend-s) < len) {
1326                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1327                     break;
1328                 s = PL_parser->bufptr;
1329                 bufend = PL_parser->bufend;
1330             }
1331         }
1332         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1333         if (retlen == (STRLEN)-1) {
1334             /* malformed UTF-8 */
1335             ENTER;
1336             SAVESPTR(PL_warnhook);
1337             PL_warnhook = PERL_WARNHOOK_FATAL;
1338             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1339             LEAVE;
1340         }
1341         return unichar;
1342     } else {
1343         if (s == bufend) {
1344             if (!lex_next_chunk(flags))
1345                 return -1;
1346             s = PL_parser->bufptr;
1347         }
1348         return (U8)*s;
1349     }
1350 }
1351
1352 /*
1353 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1354
1355 Reads the next (Unicode) character in the text currently being lexed.
1356 Returns the codepoint (unsigned integer value) of the character read,
1357 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1358 if lexing has reached the end of the input text.  To non-destructively
1359 examine the next character, use L</lex_peek_unichar> instead.
1360
1361 If the next character is in (or extends into) the next chunk of input
1362 text, the next chunk will be read in.  Normally the current chunk will be
1363 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1364 then the current chunk will not be discarded.
1365
1366 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367 is encountered, an exception is generated.
1368
1369 =cut
1370 */
1371
1372 I32
1373 Perl_lex_read_unichar(pTHX_ U32 flags)
1374 {
1375     I32 c;
1376     if (flags & ~(LEX_KEEP_PREVIOUS))
1377         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1378     c = lex_peek_unichar(flags);
1379     if (c != -1) {
1380         if (c == '\n')
1381             CopLINE_inc(PL_curcop);
1382         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1383     }
1384     return c;
1385 }
1386
1387 /*
1388 =for apidoc Amx|void|lex_read_space|U32 flags
1389
1390 Reads optional spaces, in Perl style, in the text currently being
1391 lexed.  The spaces may include ordinary whitespace characters and
1392 Perl-style comments.  C<#line> directives are processed if encountered.
1393 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1394 at a non-space character (or the end of the input text).
1395
1396 If spaces extend into the next chunk of input text, the next chunk will
1397 be read in.  Normally the current chunk will be discarded at the same
1398 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1399 chunk will not be discarded.
1400
1401 =cut
1402 */
1403
1404 void
1405 Perl_lex_read_space(pTHX_ U32 flags)
1406 {
1407     char *s, *bufend;
1408     bool need_incline = 0;
1409     if (flags & ~(LEX_KEEP_PREVIOUS))
1410         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1411 #ifdef PERL_MAD
1412     if (PL_skipwhite) {
1413         sv_free(PL_skipwhite);
1414         PL_skipwhite = NULL;
1415     }
1416     if (PL_madskills)
1417         PL_skipwhite = newSVpvs("");
1418 #endif /* PERL_MAD */
1419     s = PL_parser->bufptr;
1420     bufend = PL_parser->bufend;
1421     while (1) {
1422         char c = *s;
1423         if (c == '#') {
1424             do {
1425                 c = *++s;
1426             } while (!(c == '\n' || (c == 0 && s == bufend)));
1427         } else if (c == '\n') {
1428             s++;
1429             PL_parser->linestart = s;
1430             if (s == bufend)
1431                 need_incline = 1;
1432             else
1433                 incline(s);
1434         } else if (isSPACE(c)) {
1435             s++;
1436         } else if (c == 0 && s == bufend) {
1437             bool got_more;
1438 #ifdef PERL_MAD
1439             if (PL_madskills)
1440                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1441 #endif /* PERL_MAD */
1442             PL_parser->bufptr = s;
1443             CopLINE_inc(PL_curcop);
1444             got_more = lex_next_chunk(flags);
1445             CopLINE_dec(PL_curcop);
1446             s = PL_parser->bufptr;
1447             bufend = PL_parser->bufend;
1448             if (!got_more)
1449                 break;
1450             if (need_incline && PL_parser->rsfp) {
1451                 incline(s);
1452                 need_incline = 0;
1453             }
1454         } else {
1455             break;
1456         }
1457     }
1458 #ifdef PERL_MAD
1459     if (PL_madskills)
1460         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1461 #endif /* PERL_MAD */
1462     PL_parser->bufptr = s;
1463 }
1464
1465 /*
1466  * S_incline
1467  * This subroutine has nothing to do with tilting, whether at windmills
1468  * or pinball tables.  Its name is short for "increment line".  It
1469  * increments the current line number in CopLINE(PL_curcop) and checks
1470  * to see whether the line starts with a comment of the form
1471  *    # line 500 "foo.pm"
1472  * If so, it sets the current line number and file to the values in the comment.
1473  */
1474
1475 STATIC void
1476 S_incline(pTHX_ const char *s)
1477 {
1478     dVAR;
1479     const char *t;
1480     const char *n;
1481     const char *e;
1482
1483     PERL_ARGS_ASSERT_INCLINE;
1484
1485     CopLINE_inc(PL_curcop);
1486     if (*s++ != '#')
1487         return;
1488     while (SPACE_OR_TAB(*s))
1489         s++;
1490     if (strnEQ(s, "line", 4))
1491         s += 4;
1492     else
1493         return;
1494     if (SPACE_OR_TAB(*s))
1495         s++;
1496     else
1497         return;
1498     while (SPACE_OR_TAB(*s))
1499         s++;
1500     if (!isDIGIT(*s))
1501         return;
1502
1503     n = s;
1504     while (isDIGIT(*s))
1505         s++;
1506     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1507         return;
1508     while (SPACE_OR_TAB(*s))
1509         s++;
1510     if (*s == '"' && (t = strchr(s+1, '"'))) {
1511         s++;
1512         e = t + 1;
1513     }
1514     else {
1515         t = s;
1516         while (!isSPACE(*t))
1517             t++;
1518         e = t;
1519     }
1520     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1521         e++;
1522     if (*e != '\n' && *e != '\0')
1523         return;         /* false alarm */
1524
1525     if (t - s > 0) {
1526         const STRLEN len = t - s;
1527 #ifndef USE_ITHREADS
1528         SV *const temp_sv = CopFILESV(PL_curcop);
1529         const char *cf;
1530         STRLEN tmplen;
1531
1532         if (temp_sv) {
1533             cf = SvPVX(temp_sv);
1534             tmplen = SvCUR(temp_sv);
1535         } else {
1536             cf = NULL;
1537             tmplen = 0;
1538         }
1539
1540         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1541             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1542              * to *{"::_<newfilename"} */
1543             /* However, the long form of evals is only turned on by the
1544                debugger - usually they're "(eval %lu)" */
1545             char smallbuf[128];
1546             char *tmpbuf;
1547             GV **gvp;
1548             STRLEN tmplen2 = len;
1549             if (tmplen + 2 <= sizeof smallbuf)
1550                 tmpbuf = smallbuf;
1551             else
1552                 Newx(tmpbuf, tmplen + 2, char);
1553             tmpbuf[0] = '_';
1554             tmpbuf[1] = '<';
1555             memcpy(tmpbuf + 2, cf, tmplen);
1556             tmplen += 2;
1557             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1558             if (gvp) {
1559                 char *tmpbuf2;
1560                 GV *gv2;
1561
1562                 if (tmplen2 + 2 <= sizeof smallbuf)
1563                     tmpbuf2 = smallbuf;
1564                 else
1565                     Newx(tmpbuf2, tmplen2 + 2, char);
1566
1567                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1568                     /* Either they malloc'd it, or we malloc'd it,
1569                        so no prefix is present in ours.  */
1570                     tmpbuf2[0] = '_';
1571                     tmpbuf2[1] = '<';
1572                 }
1573
1574                 memcpy(tmpbuf2 + 2, s, tmplen2);
1575                 tmplen2 += 2;
1576
1577                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1578                 if (!isGV(gv2)) {
1579                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1580                     /* adjust ${"::_<newfilename"} to store the new file name */
1581                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1582                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1583                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1584                 }
1585
1586                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1587             }
1588             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1589         }
1590 #endif
1591         CopFILE_free(PL_curcop);
1592         CopFILE_setn(PL_curcop, s, len);
1593     }
1594     CopLINE_set(PL_curcop, atoi(n)-1);
1595 }
1596
1597 #ifdef PERL_MAD
1598 /* skip space before PL_thistoken */
1599
1600 STATIC char *
1601 S_skipspace0(pTHX_ register char *s)
1602 {
1603     PERL_ARGS_ASSERT_SKIPSPACE0;
1604
1605     s = skipspace(s);
1606     if (!PL_madskills)
1607         return s;
1608     if (PL_skipwhite) {
1609         if (!PL_thiswhite)
1610             PL_thiswhite = newSVpvs("");
1611         sv_catsv(PL_thiswhite, PL_skipwhite);
1612         sv_free(PL_skipwhite);
1613         PL_skipwhite = 0;
1614     }
1615     PL_realtokenstart = s - SvPVX(PL_linestr);
1616     return s;
1617 }
1618
1619 /* skip space after PL_thistoken */
1620
1621 STATIC char *
1622 S_skipspace1(pTHX_ register char *s)
1623 {
1624     const char *start = s;
1625     I32 startoff = start - SvPVX(PL_linestr);
1626
1627     PERL_ARGS_ASSERT_SKIPSPACE1;
1628
1629     s = skipspace(s);
1630     if (!PL_madskills)
1631         return s;
1632     start = SvPVX(PL_linestr) + startoff;
1633     if (!PL_thistoken && PL_realtokenstart >= 0) {
1634         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1635         PL_thistoken = newSVpvn(tstart, start - tstart);
1636     }
1637     PL_realtokenstart = -1;
1638     if (PL_skipwhite) {
1639         if (!PL_nextwhite)
1640             PL_nextwhite = newSVpvs("");
1641         sv_catsv(PL_nextwhite, PL_skipwhite);
1642         sv_free(PL_skipwhite);
1643         PL_skipwhite = 0;
1644     }
1645     return s;
1646 }
1647
1648 STATIC char *
1649 S_skipspace2(pTHX_ register char *s, SV **svp)
1650 {
1651     char *start;
1652     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1653     const I32 startoff = s - SvPVX(PL_linestr);
1654
1655     PERL_ARGS_ASSERT_SKIPSPACE2;
1656
1657     s = skipspace(s);
1658     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1659     if (!PL_madskills || !svp)
1660         return s;
1661     start = SvPVX(PL_linestr) + startoff;
1662     if (!PL_thistoken && PL_realtokenstart >= 0) {
1663         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1664         PL_thistoken = newSVpvn(tstart, start - tstart);
1665         PL_realtokenstart = -1;
1666     }
1667     if (PL_skipwhite) {
1668         if (!*svp)
1669             *svp = newSVpvs("");
1670         sv_setsv(*svp, PL_skipwhite);
1671         sv_free(PL_skipwhite);
1672         PL_skipwhite = 0;
1673     }
1674
1675     return s;
1676 }
1677 #endif
1678
1679 STATIC void
1680 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1681 {
1682     AV *av = CopFILEAVx(PL_curcop);
1683     if (av) {
1684         SV * const sv = newSV_type(SVt_PVMG);
1685         if (orig_sv)
1686             sv_setsv(sv, orig_sv);
1687         else
1688             sv_setpvn(sv, buf, len);
1689         (void)SvIOK_on(sv);
1690         SvIV_set(sv, 0);
1691         av_store(av, (I32)CopLINE(PL_curcop), sv);
1692     }
1693 }
1694
1695 /*
1696  * S_skipspace
1697  * Called to gobble the appropriate amount and type of whitespace.
1698  * Skips comments as well.
1699  */
1700
1701 STATIC char *
1702 S_skipspace(pTHX_ register char *s)
1703 {
1704 #ifdef PERL_MAD
1705     char *start = s;
1706 #endif /* PERL_MAD */
1707     PERL_ARGS_ASSERT_SKIPSPACE;
1708 #ifdef PERL_MAD
1709     if (PL_skipwhite) {
1710         sv_free(PL_skipwhite);
1711         PL_skipwhite = NULL;
1712     }
1713 #endif /* PERL_MAD */
1714     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1715         while (s < PL_bufend && SPACE_OR_TAB(*s))
1716             s++;
1717     } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1718         while (isSPACE(*s) && *s != '\n')
1719             s++;
1720         if (*s == '#') {
1721             do {
1722                 s++;
1723             } while (s != PL_bufend && *s != '\n');
1724         }
1725         if (*s == '\n')
1726             s++;
1727     } else {
1728         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1729         PL_bufptr = s;
1730         lex_read_space(LEX_KEEP_PREVIOUS);
1731         s = PL_bufptr;
1732         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1733         if (PL_linestart > PL_bufptr)
1734             PL_bufptr = PL_linestart;
1735         return s;
1736     }
1737 #ifdef PERL_MAD
1738     if (PL_madskills)
1739         PL_skipwhite = newSVpvn(start, s-start);
1740 #endif /* PERL_MAD */
1741     return s;
1742 }
1743
1744 /*
1745  * S_check_uni
1746  * Check the unary operators to ensure there's no ambiguity in how they're
1747  * used.  An ambiguous piece of code would be:
1748  *     rand + 5
1749  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1750  * the +5 is its argument.
1751  */
1752
1753 STATIC void
1754 S_check_uni(pTHX)
1755 {
1756     dVAR;
1757     const char *s;
1758     const char *t;
1759
1760     if (PL_oldoldbufptr != PL_last_uni)
1761         return;
1762     while (isSPACE(*PL_last_uni))
1763         PL_last_uni++;
1764     s = PL_last_uni;
1765     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1766         s++;
1767     if ((t = strchr(s, '(')) && t < PL_bufptr)
1768         return;
1769
1770     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1771                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1772                      (int)(s - PL_last_uni), PL_last_uni);
1773 }
1774
1775 /*
1776  * LOP : macro to build a list operator.  Its behaviour has been replaced
1777  * with a subroutine, S_lop() for which LOP is just another name.
1778  */
1779
1780 #define LOP(f,x) return lop(f,x,s)
1781
1782 /*
1783  * S_lop
1784  * Build a list operator (or something that might be one).  The rules:
1785  *  - if we have a next token, then it's a list operator [why?]
1786  *  - if the next thing is an opening paren, then it's a function
1787  *  - else it's a list operator
1788  */
1789
1790 STATIC I32
1791 S_lop(pTHX_ I32 f, int x, char *s)
1792 {
1793     dVAR;
1794
1795     PERL_ARGS_ASSERT_LOP;
1796
1797     pl_yylval.ival = f;
1798     CLINE;
1799     PL_expect = x;
1800     PL_bufptr = s;
1801     PL_last_lop = PL_oldbufptr;
1802     PL_last_lop_op = (OPCODE)f;
1803 #ifdef PERL_MAD
1804     if (PL_lasttoke)
1805         return REPORT(LSTOP);
1806 #else
1807     if (PL_nexttoke)
1808         return REPORT(LSTOP);
1809 #endif
1810     if (*s == '(')
1811         return REPORT(FUNC);
1812     s = PEEKSPACE(s);
1813     if (*s == '(')
1814         return REPORT(FUNC);
1815     else
1816         return REPORT(LSTOP);
1817 }
1818
1819 #ifdef PERL_MAD
1820  /*
1821  * S_start_force
1822  * Sets up for an eventual force_next().  start_force(0) basically does
1823  * an unshift, while start_force(-1) does a push.  yylex removes items
1824  * on the "pop" end.
1825  */
1826
1827 STATIC void
1828 S_start_force(pTHX_ int where)
1829 {
1830     int i;
1831
1832     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1833         where = PL_lasttoke;
1834     assert(PL_curforce < 0 || PL_curforce == where);
1835     if (PL_curforce != where) {
1836         for (i = PL_lasttoke; i > where; --i) {
1837             PL_nexttoke[i] = PL_nexttoke[i-1];
1838         }
1839         PL_lasttoke++;
1840     }
1841     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1842         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1843     PL_curforce = where;
1844     if (PL_nextwhite) {
1845         if (PL_madskills)
1846             curmad('^', newSVpvs(""));
1847         CURMAD('_', PL_nextwhite);
1848     }
1849 }
1850
1851 STATIC void
1852 S_curmad(pTHX_ char slot, SV *sv)
1853 {
1854     MADPROP **where;
1855
1856     if (!sv)
1857         return;
1858     if (PL_curforce < 0)
1859         where = &PL_thismad;
1860     else
1861         where = &PL_nexttoke[PL_curforce].next_mad;
1862
1863     if (PL_faketokens)
1864         sv_setpvs(sv, "");
1865     else {
1866         if (!IN_BYTES) {
1867             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1868                 SvUTF8_on(sv);
1869             else if (PL_encoding) {
1870                 sv_recode_to_utf8(sv, PL_encoding);
1871             }
1872         }
1873     }
1874
1875     /* keep a slot open for the head of the list? */
1876     if (slot != '_' && *where && (*where)->mad_key == '^') {
1877         (*where)->mad_key = slot;
1878         sv_free(MUTABLE_SV(((*where)->mad_val)));
1879         (*where)->mad_val = (void*)sv;
1880     }
1881     else
1882         addmad(newMADsv(slot, sv), where, 0);
1883 }
1884 #else
1885 #  define start_force(where)    NOOP
1886 #  define curmad(slot, sv)      NOOP
1887 #endif
1888
1889 /*
1890  * S_force_next
1891  * When the lexer realizes it knows the next token (for instance,
1892  * it is reordering tokens for the parser) then it can call S_force_next
1893  * to know what token to return the next time the lexer is called.  Caller
1894  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1895  * and possibly PL_expect to ensure the lexer handles the token correctly.
1896  */
1897
1898 STATIC void
1899 S_force_next(pTHX_ I32 type)
1900 {
1901     dVAR;
1902 #ifdef DEBUGGING
1903     if (DEBUG_T_TEST) {
1904         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1905         tokereport(type, &NEXTVAL_NEXTTOKE);
1906     }
1907 #endif
1908 #ifdef PERL_MAD
1909     if (PL_curforce < 0)
1910         start_force(PL_lasttoke);
1911     PL_nexttoke[PL_curforce].next_type = type;
1912     if (PL_lex_state != LEX_KNOWNEXT)
1913         PL_lex_defer = PL_lex_state;
1914     PL_lex_state = LEX_KNOWNEXT;
1915     PL_lex_expect = PL_expect;
1916     PL_curforce = -1;
1917 #else
1918     PL_nexttype[PL_nexttoke] = type;
1919     PL_nexttoke++;
1920     if (PL_lex_state != LEX_KNOWNEXT) {
1921         PL_lex_defer = PL_lex_state;
1922         PL_lex_expect = PL_expect;
1923         PL_lex_state = LEX_KNOWNEXT;
1924     }
1925 #endif
1926 }
1927
1928 STATIC SV *
1929 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1930 {
1931     dVAR;
1932     SV * const sv = newSVpvn_utf8(start, len,
1933                                   !IN_BYTES
1934                                   && UTF
1935                                   && !is_ascii_string((const U8*)start, len)
1936                                   && is_utf8_string((const U8*)start, len));
1937     return sv;
1938 }
1939
1940 /*
1941  * S_force_word
1942  * When the lexer knows the next thing is a word (for instance, it has
1943  * just seen -> and it knows that the next char is a word char, then
1944  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1945  * lookahead.
1946  *
1947  * Arguments:
1948  *   char *start : buffer position (must be within PL_linestr)
1949  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1950  *   int check_keyword : if true, Perl checks to make sure the word isn't
1951  *       a keyword (do this if the word is a label, e.g. goto FOO)
1952  *   int allow_pack : if true, : characters will also be allowed (require,
1953  *       use, etc. do this)
1954  *   int allow_initial_tick : used by the "sub" lexer only.
1955  */
1956
1957 STATIC char *
1958 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1959 {
1960     dVAR;
1961     register char *s;
1962     STRLEN len;
1963
1964     PERL_ARGS_ASSERT_FORCE_WORD;
1965
1966     start = SKIPSPACE1(start);
1967     s = start;
1968     if (isIDFIRST_lazy_if(s,UTF) ||
1969         (allow_pack && *s == ':') ||
1970         (allow_initial_tick && *s == '\'') )
1971     {
1972         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1973         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1974             return start;
1975         start_force(PL_curforce);
1976         if (PL_madskills)
1977             curmad('X', newSVpvn(start,s-start));
1978         if (token == METHOD) {
1979             s = SKIPSPACE1(s);
1980             if (*s == '(')
1981                 PL_expect = XTERM;
1982             else {
1983                 PL_expect = XOPERATOR;
1984             }
1985         }
1986         if (PL_madskills)
1987             curmad('g', newSVpvs( "forced" ));
1988         NEXTVAL_NEXTTOKE.opval
1989             = (OP*)newSVOP(OP_CONST,0,
1990                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1991         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1992         force_next(token);
1993     }
1994     return s;
1995 }
1996
1997 /*
1998  * S_force_ident
1999  * Called when the lexer wants $foo *foo &foo etc, but the program
2000  * text only contains the "foo" portion.  The first argument is a pointer
2001  * to the "foo", and the second argument is the type symbol to prefix.
2002  * Forces the next token to be a "WORD".
2003  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2004  */
2005
2006 STATIC void
2007 S_force_ident(pTHX_ register const char *s, int kind)
2008 {
2009     dVAR;
2010
2011     PERL_ARGS_ASSERT_FORCE_IDENT;
2012
2013     if (*s) {
2014         const STRLEN len = strlen(s);
2015         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2016         start_force(PL_curforce);
2017         NEXTVAL_NEXTTOKE.opval = o;
2018         force_next(WORD);
2019         if (kind) {
2020             o->op_private = OPpCONST_ENTERED;
2021             /* XXX see note in pp_entereval() for why we forgo typo
2022                warnings if the symbol must be introduced in an eval.
2023                GSAR 96-10-12 */
2024             gv_fetchpvn_flags(s, len,
2025                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2026                               : GV_ADD,
2027                               kind == '$' ? SVt_PV :
2028                               kind == '@' ? SVt_PVAV :
2029                               kind == '%' ? SVt_PVHV :
2030                               SVt_PVGV
2031                               );
2032         }
2033     }
2034 }
2035
2036 NV
2037 Perl_str_to_version(pTHX_ SV *sv)
2038 {
2039     NV retval = 0.0;
2040     NV nshift = 1.0;
2041     STRLEN len;
2042     const char *start = SvPV_const(sv,len);
2043     const char * const end = start + len;
2044     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2045
2046     PERL_ARGS_ASSERT_STR_TO_VERSION;
2047
2048     while (start < end) {
2049         STRLEN skip;
2050         UV n;
2051         if (utf)
2052             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2053         else {
2054             n = *(U8*)start;
2055             skip = 1;
2056         }
2057         retval += ((NV)n)/nshift;
2058         start += skip;
2059         nshift *= 1000;
2060     }
2061     return retval;
2062 }
2063
2064 /*
2065  * S_force_version
2066  * Forces the next token to be a version number.
2067  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2068  * and if "guessing" is TRUE, then no new token is created (and the caller
2069  * must use an alternative parsing method).
2070  */
2071
2072 STATIC char *
2073 S_force_version(pTHX_ char *s, int guessing)
2074 {
2075     dVAR;
2076     OP *version = NULL;
2077     char *d;
2078 #ifdef PERL_MAD
2079     I32 startoff = s - SvPVX(PL_linestr);
2080 #endif
2081
2082     PERL_ARGS_ASSERT_FORCE_VERSION;
2083
2084     s = SKIPSPACE1(s);
2085
2086     d = s;
2087     if (*d == 'v')
2088         d++;
2089     if (isDIGIT(*d)) {
2090         while (isDIGIT(*d) || *d == '_' || *d == '.')
2091             d++;
2092 #ifdef PERL_MAD
2093         if (PL_madskills) {
2094             start_force(PL_curforce);
2095             curmad('X', newSVpvn(s,d-s));
2096         }
2097 #endif
2098         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2099             SV *ver;
2100             s = scan_num(s, &pl_yylval);
2101             version = pl_yylval.opval;
2102             ver = cSVOPx(version)->op_sv;
2103             if (SvPOK(ver) && !SvNIOK(ver)) {
2104                 SvUPGRADE(ver, SVt_PVNV);
2105                 SvNV_set(ver, str_to_version(ver));
2106                 SvNOK_on(ver);          /* hint that it is a version */
2107             }
2108         }
2109         else if (guessing) {
2110 #ifdef PERL_MAD
2111             if (PL_madskills) {
2112                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2113                 PL_nextwhite = 0;
2114                 s = SvPVX(PL_linestr) + startoff;
2115             }
2116 #endif
2117             return s;
2118         }
2119     }
2120
2121 #ifdef PERL_MAD
2122     if (PL_madskills && !version) {
2123         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2124         PL_nextwhite = 0;
2125         s = SvPVX(PL_linestr) + startoff;
2126     }
2127 #endif
2128     /* NOTE: The parser sees the package name and the VERSION swapped */
2129     start_force(PL_curforce);
2130     NEXTVAL_NEXTTOKE.opval = version;
2131     force_next(WORD);
2132
2133     return s;
2134 }
2135
2136 /*
2137  * S_tokeq
2138  * Tokenize a quoted string passed in as an SV.  It finds the next
2139  * chunk, up to end of string or a backslash.  It may make a new
2140  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2141  * turns \\ into \.
2142  */
2143
2144 STATIC SV *
2145 S_tokeq(pTHX_ SV *sv)
2146 {
2147     dVAR;
2148     register char *s;
2149     register char *send;
2150     register char *d;
2151     STRLEN len = 0;
2152     SV *pv = sv;
2153
2154     PERL_ARGS_ASSERT_TOKEQ;
2155
2156     if (!SvLEN(sv))
2157         goto finish;
2158
2159     s = SvPV_force(sv, len);
2160     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2161         goto finish;
2162     send = s + len;
2163     while (s < send && *s != '\\')
2164         s++;
2165     if (s == send)
2166         goto finish;
2167     d = s;
2168     if ( PL_hints & HINT_NEW_STRING ) {
2169         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2170     }
2171     while (s < send) {
2172         if (*s == '\\') {
2173             if (s + 1 < send && (s[1] == '\\'))
2174                 s++;            /* all that, just for this */
2175         }
2176         *d++ = *s++;
2177     }
2178     *d = '\0';
2179     SvCUR_set(sv, d - SvPVX_const(sv));
2180   finish:
2181     if ( PL_hints & HINT_NEW_STRING )
2182        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2183     return sv;
2184 }
2185
2186 /*
2187  * Now come three functions related to double-quote context,
2188  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2189  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2190  * interact with PL_lex_state, and create fake ( ... ) argument lists
2191  * to handle functions and concatenation.
2192  * They assume that whoever calls them will be setting up a fake
2193  * join call, because each subthing puts a ',' after it.  This lets
2194  *   "lower \luPpEr"
2195  * become
2196  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2197  *
2198  * (I'm not sure whether the spurious commas at the end of lcfirst's
2199  * arguments and join's arguments are created or not).
2200  */
2201
2202 /*
2203  * S_sublex_start
2204  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2205  *
2206  * Pattern matching will set PL_lex_op to the pattern-matching op to
2207  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2208  *
2209  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2210  *
2211  * Everything else becomes a FUNC.
2212  *
2213  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2214  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2215  * call to S_sublex_push().
2216  */
2217
2218 STATIC I32
2219 S_sublex_start(pTHX)
2220 {
2221     dVAR;
2222     register const I32 op_type = pl_yylval.ival;
2223
2224     if (op_type == OP_NULL) {
2225         pl_yylval.opval = PL_lex_op;
2226         PL_lex_op = NULL;
2227         return THING;
2228     }
2229     if (op_type == OP_CONST || op_type == OP_READLINE) {
2230         SV *sv = tokeq(PL_lex_stuff);
2231
2232         if (SvTYPE(sv) == SVt_PVIV) {
2233             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2234             STRLEN len;
2235             const char * const p = SvPV_const(sv, len);
2236             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2237             SvREFCNT_dec(sv);
2238             sv = nsv;
2239         }
2240         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2241         PL_lex_stuff = NULL;
2242         /* Allow <FH> // "foo" */
2243         if (op_type == OP_READLINE)
2244             PL_expect = XTERMORDORDOR;
2245         return THING;
2246     }
2247     else if (op_type == OP_BACKTICK && PL_lex_op) {
2248         /* readpipe() vas overriden */
2249         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2250         pl_yylval.opval = PL_lex_op;
2251         PL_lex_op = NULL;
2252         PL_lex_stuff = NULL;
2253         return THING;
2254     }
2255
2256     PL_sublex_info.super_state = PL_lex_state;
2257     PL_sublex_info.sub_inwhat = (U16)op_type;
2258     PL_sublex_info.sub_op = PL_lex_op;
2259     PL_lex_state = LEX_INTERPPUSH;
2260
2261     PL_expect = XTERM;
2262     if (PL_lex_op) {
2263         pl_yylval.opval = PL_lex_op;
2264         PL_lex_op = NULL;
2265         return PMFUNC;
2266     }
2267     else
2268         return FUNC;
2269 }
2270
2271 /*
2272  * S_sublex_push
2273  * Create a new scope to save the lexing state.  The scope will be
2274  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2275  * to the uc, lc, etc. found before.
2276  * Sets PL_lex_state to LEX_INTERPCONCAT.
2277  */
2278
2279 STATIC I32
2280 S_sublex_push(pTHX)
2281 {
2282     dVAR;
2283     ENTER;
2284
2285     PL_lex_state = PL_sublex_info.super_state;
2286     SAVEBOOL(PL_lex_dojoin);
2287     SAVEI32(PL_lex_brackets);
2288     SAVEI32(PL_lex_casemods);
2289     SAVEI32(PL_lex_starts);
2290     SAVEI8(PL_lex_state);
2291     SAVEVPTR(PL_lex_inpat);
2292     SAVEI16(PL_lex_inwhat);
2293     SAVECOPLINE(PL_curcop);
2294     SAVEPPTR(PL_bufptr);
2295     SAVEPPTR(PL_bufend);
2296     SAVEPPTR(PL_oldbufptr);
2297     SAVEPPTR(PL_oldoldbufptr);
2298     SAVEPPTR(PL_last_lop);
2299     SAVEPPTR(PL_last_uni);
2300     SAVEPPTR(PL_linestart);
2301     SAVESPTR(PL_linestr);
2302     SAVEGENERICPV(PL_lex_brackstack);
2303     SAVEGENERICPV(PL_lex_casestack);
2304
2305     PL_linestr = PL_lex_stuff;
2306     PL_lex_stuff = NULL;
2307
2308     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2309         = SvPVX(PL_linestr);
2310     PL_bufend += SvCUR(PL_linestr);
2311     PL_last_lop = PL_last_uni = NULL;
2312     SAVEFREESV(PL_linestr);
2313
2314     PL_lex_dojoin = FALSE;
2315     PL_lex_brackets = 0;
2316     Newx(PL_lex_brackstack, 120, char);
2317     Newx(PL_lex_casestack, 12, char);
2318     PL_lex_casemods = 0;
2319     *PL_lex_casestack = '\0';
2320     PL_lex_starts = 0;
2321     PL_lex_state = LEX_INTERPCONCAT;
2322     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2323
2324     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2325     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2326         PL_lex_inpat = PL_sublex_info.sub_op;
2327     else
2328         PL_lex_inpat = NULL;
2329
2330     return '(';
2331 }
2332
2333 /*
2334  * S_sublex_done
2335  * Restores lexer state after a S_sublex_push.
2336  */
2337
2338 STATIC I32
2339 S_sublex_done(pTHX)
2340 {
2341     dVAR;
2342     if (!PL_lex_starts++) {
2343         SV * const sv = newSVpvs("");
2344         if (SvUTF8(PL_linestr))
2345             SvUTF8_on(sv);
2346         PL_expect = XOPERATOR;
2347         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2348         return THING;
2349     }
2350
2351     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2352         PL_lex_state = LEX_INTERPCASEMOD;
2353         return yylex();
2354     }
2355
2356     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2357     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2358         PL_linestr = PL_lex_repl;
2359         PL_lex_inpat = 0;
2360         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2361         PL_bufend += SvCUR(PL_linestr);
2362         PL_last_lop = PL_last_uni = NULL;
2363         SAVEFREESV(PL_linestr);
2364         PL_lex_dojoin = FALSE;
2365         PL_lex_brackets = 0;
2366         PL_lex_casemods = 0;
2367         *PL_lex_casestack = '\0';
2368         PL_lex_starts = 0;
2369         if (SvEVALED(PL_lex_repl)) {
2370             PL_lex_state = LEX_INTERPNORMAL;
2371             PL_lex_starts++;
2372             /*  we don't clear PL_lex_repl here, so that we can check later
2373                 whether this is an evalled subst; that means we rely on the
2374                 logic to ensure sublex_done() is called again only via the
2375                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2376         }
2377         else {
2378             PL_lex_state = LEX_INTERPCONCAT;
2379             PL_lex_repl = NULL;
2380         }
2381         return ',';
2382     }
2383     else {
2384 #ifdef PERL_MAD
2385         if (PL_madskills) {
2386             if (PL_thiswhite) {
2387                 if (!PL_endwhite)
2388                     PL_endwhite = newSVpvs("");
2389                 sv_catsv(PL_endwhite, PL_thiswhite);
2390                 PL_thiswhite = 0;
2391             }
2392             if (PL_thistoken)
2393                 sv_setpvs(PL_thistoken,"");
2394             else
2395                 PL_realtokenstart = -1;
2396         }
2397 #endif
2398         LEAVE;
2399         PL_bufend = SvPVX(PL_linestr);
2400         PL_bufend += SvCUR(PL_linestr);
2401         PL_expect = XOPERATOR;
2402         PL_sublex_info.sub_inwhat = 0;
2403         return ')';
2404     }
2405 }
2406
2407 /*
2408   scan_const
2409
2410   Extracts a pattern, double-quoted string, or transliteration.  This
2411   is terrifying code.
2412
2413   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2414   processing a pattern (PL_lex_inpat is true), a transliteration
2415   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2416
2417   Returns a pointer to the character scanned up to. If this is
2418   advanced from the start pointer supplied (i.e. if anything was
2419   successfully parsed), will leave an OP for the substring scanned
2420   in pl_yylval. Caller must intuit reason for not parsing further
2421   by looking at the next characters herself.
2422
2423   In patterns:
2424     backslashes:
2425       double-quoted style: \r and \n
2426       regexp special ones: \D \s
2427       constants: \x31
2428       backrefs: \1
2429       case and quoting: \U \Q \E
2430     stops on @ and $, but not for $ as tail anchor
2431
2432   In transliterations:
2433     characters are VERY literal, except for - not at the start or end
2434     of the string, which indicates a range. If the range is in bytes,
2435     scan_const expands the range to the full set of intermediate
2436     characters. If the range is in utf8, the hyphen is replaced with
2437     a certain range mark which will be handled by pmtrans() in op.c.
2438
2439   In double-quoted strings:
2440     backslashes:
2441       double-quoted style: \r and \n
2442       constants: \x31
2443       deprecated backrefs: \1 (in substitution replacements)
2444       case and quoting: \U \Q \E
2445     stops on @ and $
2446
2447   scan_const does *not* construct ops to handle interpolated strings.
2448   It stops processing as soon as it finds an embedded $ or @ variable
2449   and leaves it to the caller to work out what's going on.
2450
2451   embedded arrays (whether in pattern or not) could be:
2452       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2453
2454   $ in double-quoted strings must be the symbol of an embedded scalar.
2455
2456   $ in pattern could be $foo or could be tail anchor.  Assumption:
2457   it's a tail anchor if $ is the last thing in the string, or if it's
2458   followed by one of "()| \r\n\t"
2459
2460   \1 (backreferences) are turned into $1
2461
2462   The structure of the code is
2463       while (there's a character to process) {
2464           handle transliteration ranges
2465           skip regexp comments /(?#comment)/ and codes /(?{code})/
2466           skip #-initiated comments in //x patterns
2467           check for embedded arrays
2468           check for embedded scalars
2469           if (backslash) {
2470               leave intact backslashes from leaveit (below)
2471               deprecate \1 in substitution replacements
2472               handle string-changing backslashes \l \U \Q \E, etc.
2473               switch (what was escaped) {
2474                   handle \- in a transliteration (becomes a literal -)
2475                   handle \132 (octal characters)
2476                   handle \x15 and \x{1234} (hex characters)
2477                   handle \N{name} (named characters)
2478                   handle \cV (control characters)
2479                   handle printf-style backslashes (\f, \r, \n, etc)
2480               } (end switch)
2481               continue
2482           } (end if backslash)
2483           handle regular character
2484     } (end while character to read)
2485
2486 */
2487
2488 STATIC char *
2489 S_scan_const(pTHX_ char *start)
2490 {
2491     dVAR;
2492     register char *send = PL_bufend;            /* end of the constant */
2493     SV *sv = newSV(send - start);               /* sv for the constant.  See
2494                                                    note below on sizing. */
2495     register char *s = start;                   /* start of the constant */
2496     register char *d = SvPVX(sv);               /* destination for copies */
2497     bool dorange = FALSE;                       /* are we in a translit range? */
2498     bool didrange = FALSE;                      /* did we just finish a range? */
2499     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2500     I32  this_utf8 = UTF;                       /* Is the source string assumed
2501                                                    to be UTF8?  But, this can
2502                                                    show as true when the source
2503                                                    isn't utf8, as for example
2504                                                    when it is entirely composed
2505                                                    of hex constants */
2506
2507     /* Note on sizing:  The scanned constant is placed into sv, which is
2508      * initialized by newSV() assuming one byte of output for every byte of
2509      * input.  This routine expects newSV() to allocate an extra byte for a
2510      * trailing NUL, which this routine will append if it gets to the end of
2511      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2512      * CAPITAL LETTER A}), or more output than input if the constant ends up
2513      * recoded to utf8, but each time a construct is found that might increase
2514      * the needed size, SvGROW() is called.  Its size parameter each time is
2515      * based on the best guess estimate at the time, namely the length used so
2516      * far, plus the length the current construct will occupy, plus room for
2517      * the trailing NUL, plus one byte for every input byte still unscanned */
2518
2519     UV uv;
2520 #ifdef EBCDIC
2521     UV literal_endpoint = 0;
2522     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2523 #endif
2524
2525     PERL_ARGS_ASSERT_SCAN_CONST;
2526
2527     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2528         /* If we are doing a trans and we know we want UTF8 set expectation */
2529         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2530         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2531     }
2532
2533
2534     while (s < send || dorange) {
2535         /* get transliterations out of the way (they're most literal) */
2536         if (PL_lex_inwhat == OP_TRANS) {
2537             /* expand a range A-Z to the full set of characters.  AIE! */
2538             if (dorange) {
2539                 I32 i;                          /* current expanded character */
2540                 I32 min;                        /* first character in range */
2541                 I32 max;                        /* last character in range */
2542
2543 #ifdef EBCDIC
2544                 UV uvmax = 0;
2545 #endif
2546
2547                 if (has_utf8
2548 #ifdef EBCDIC
2549                     && !native_range
2550 #endif
2551                     ) {
2552                     char * const c = (char*)utf8_hop((U8*)d, -1);
2553                     char *e = d++;
2554                     while (e-- > c)
2555                         *(e + 1) = *e;
2556                     *c = (char)UTF_TO_NATIVE(0xff);
2557                     /* mark the range as done, and continue */
2558                     dorange = FALSE;
2559                     didrange = TRUE;
2560                     continue;
2561                 }
2562
2563                 i = d - SvPVX_const(sv);                /* remember current offset */
2564 #ifdef EBCDIC
2565                 SvGROW(sv,
2566                        SvLEN(sv) + (has_utf8 ?
2567                                     (512 - UTF_CONTINUATION_MARK +
2568                                      UNISKIP(0x100))
2569                                     : 256));
2570                 /* How many two-byte within 0..255: 128 in UTF-8,
2571                  * 96 in UTF-8-mod. */
2572 #else
2573                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2574 #endif
2575                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2576 #ifdef EBCDIC
2577                 if (has_utf8) {
2578                     int j;
2579                     for (j = 0; j <= 1; j++) {
2580                         char * const c = (char*)utf8_hop((U8*)d, -1);
2581                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2582                         if (j)
2583                             min = (U8)uv;
2584                         else if (uv < 256)
2585                             max = (U8)uv;
2586                         else {
2587                             max = (U8)0xff; /* only to \xff */
2588                             uvmax = uv; /* \x{100} to uvmax */
2589                         }
2590                         d = c; /* eat endpoint chars */
2591                      }
2592                 }
2593                else {
2594 #endif
2595                    d -= 2;              /* eat the first char and the - */
2596                    min = (U8)*d;        /* first char in range */
2597                    max = (U8)d[1];      /* last char in range  */
2598 #ifdef EBCDIC
2599                }
2600 #endif
2601
2602                 if (min > max) {
2603                     Perl_croak(aTHX_
2604                                "Invalid range \"%c-%c\" in transliteration operator",
2605                                (char)min, (char)max);
2606                 }
2607
2608 #ifdef EBCDIC
2609                 if (literal_endpoint == 2 &&
2610                     ((isLOWER(min) && isLOWER(max)) ||
2611                      (isUPPER(min) && isUPPER(max)))) {
2612                     if (isLOWER(min)) {
2613                         for (i = min; i <= max; i++)
2614                             if (isLOWER(i))
2615                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2616                     } else {
2617                         for (i = min; i <= max; i++)
2618                             if (isUPPER(i))
2619                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2620                     }
2621                 }
2622                 else
2623 #endif
2624                     for (i = min; i <= max; i++)
2625 #ifdef EBCDIC
2626                         if (has_utf8) {
2627                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2628                             if (UNI_IS_INVARIANT(ch))
2629                                 *d++ = (U8)i;
2630                             else {
2631                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2632                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2633                             }
2634                         }
2635                         else
2636 #endif
2637                             *d++ = (char)i;
2638
2639 #ifdef EBCDIC
2640                 if (uvmax) {
2641                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2642                     if (uvmax > 0x101)
2643                         *d++ = (char)UTF_TO_NATIVE(0xff);
2644                     if (uvmax > 0x100)
2645                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2646                 }
2647 #endif
2648
2649                 /* mark the range as done, and continue */
2650                 dorange = FALSE;
2651                 didrange = TRUE;
2652 #ifdef EBCDIC
2653                 literal_endpoint = 0;
2654 #endif
2655                 continue;
2656             }
2657
2658             /* range begins (ignore - as first or last char) */
2659             else if (*s == '-' && s+1 < send  && s != start) {
2660                 if (didrange) {
2661                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2662                 }
2663                 if (has_utf8
2664 #ifdef EBCDIC
2665                     && !native_range
2666 #endif
2667                     ) {
2668                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2669                     s++;
2670                     continue;
2671                 }
2672                 dorange = TRUE;
2673                 s++;
2674             }
2675             else {
2676                 didrange = FALSE;
2677 #ifdef EBCDIC
2678                 literal_endpoint = 0;
2679                 native_range = TRUE;
2680 #endif
2681             }
2682         }
2683
2684         /* if we get here, we're not doing a transliteration */
2685
2686         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2687            except for the last char, which will be done separately. */
2688         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2689             if (s[2] == '#') {
2690                 while (s+1 < send && *s != ')')
2691                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2692             }
2693             else if (s[2] == '{' /* This should match regcomp.c */
2694                     || (s[2] == '?' && s[3] == '{'))
2695             {
2696                 I32 count = 1;
2697                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2698                 char c;
2699
2700                 while (count && (c = *regparse)) {
2701                     if (c == '\\' && regparse[1])
2702                         regparse++;
2703                     else if (c == '{')
2704                         count++;
2705                     else if (c == '}')
2706                         count--;
2707                     regparse++;
2708                 }
2709                 if (*regparse != ')')
2710                     regparse--;         /* Leave one char for continuation. */
2711                 while (s < regparse)
2712                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2713             }
2714         }
2715
2716         /* likewise skip #-initiated comments in //x patterns */
2717         else if (*s == '#' && PL_lex_inpat &&
2718           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2719             while (s+1 < send && *s != '\n')
2720                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2721         }
2722
2723         /* check for embedded arrays
2724            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2725            */
2726         else if (*s == '@' && s[1]) {
2727             if (isALNUM_lazy_if(s+1,UTF))
2728                 break;
2729             if (strchr(":'{$", s[1]))
2730                 break;
2731             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2732                 break; /* in regexp, neither @+ nor @- are interpolated */
2733         }
2734
2735         /* check for embedded scalars.  only stop if we're sure it's a
2736            variable.
2737         */
2738         else if (*s == '$') {
2739             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2740                 break;
2741             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2742                 if (s[1] == '\\') {
2743                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2744                                    "Possible unintended interpolation of $\\ in regex");
2745                 }
2746                 break;          /* in regexp, $ might be tail anchor */
2747             }
2748         }
2749
2750         /* End of else if chain - OP_TRANS rejoin rest */
2751
2752         /* backslashes */
2753         if (*s == '\\' && s+1 < send) {
2754             s++;
2755
2756             /* deprecate \1 in strings and substitution replacements */
2757             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2758                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2759             {
2760                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2761                 *--s = '$';
2762                 break;
2763             }
2764
2765             /* string-change backslash escapes */
2766             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2767                 --s;
2768                 break;
2769             }
2770             /* skip any other backslash escapes in a pattern */
2771             else if (PL_lex_inpat) {
2772                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2773                 goto default_action;
2774             }
2775
2776             /* if we get here, it's either a quoted -, or a digit */
2777             switch (*s) {
2778
2779             /* quoted - in transliterations */
2780             case '-':
2781                 if (PL_lex_inwhat == OP_TRANS) {
2782                     *d++ = *s++;
2783                     continue;
2784                 }
2785                 /* FALL THROUGH */
2786             default:
2787                 {
2788                     if ((isALPHA(*s) || isDIGIT(*s)))
2789                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2790                                        "Unrecognized escape \\%c passed through",
2791                                        *s);
2792                     /* default action is to copy the quoted character */
2793                     goto default_action;
2794                 }
2795
2796             /* eg. \132 indicates the octal constant 0x132 */
2797             case '0': case '1': case '2': case '3':
2798             case '4': case '5': case '6': case '7':
2799                 {
2800                     I32 flags = 0;
2801                     STRLEN len = 3;
2802                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2803                     s += len;
2804                 }
2805                 goto NUM_ESCAPE_INSERT;
2806
2807             /* eg. \x24 indicates the hex constant 0x24 */
2808             case 'x':
2809                 ++s;
2810                 if (*s == '{') {
2811                     char* const e = strchr(s, '}');
2812                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2813                       PERL_SCAN_DISALLOW_PREFIX;
2814                     STRLEN len;
2815
2816                     ++s;
2817                     if (!e) {
2818                         yyerror("Missing right brace on \\x{}");
2819                         continue;
2820                     }
2821                     len = e - s;
2822                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2823                     s = e + 1;
2824                 }
2825                 else {
2826                     {
2827                         STRLEN len = 2;
2828                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2829                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2830                         s += len;
2831                     }
2832                 }
2833
2834               NUM_ESCAPE_INSERT:
2835                 /* Insert oct, hex, or \N{U+...} escaped character.  There will
2836                  * always be enough room in sv since such escapes will be
2837                  * longer than any UTF-8 sequence they can end up as, except if
2838                  * they force us to recode the rest of the string into utf8 */
2839
2840                 /* Here uv is the ordinal of the next character being added in
2841                  * unicode (converted from native).  (It has to be done before
2842                  * here because \N is interpreted as unicode, and oct and hex
2843                  * as native.) */
2844                 if (!UNI_IS_INVARIANT(uv)) {
2845                     if (!has_utf8 && uv > 255) {
2846                         /* Might need to recode whatever we have accumulated so
2847                          * far if it contains any chars variant in utf8 or
2848                          * utf-ebcdic. */
2849
2850                         SvCUR_set(sv, d - SvPVX_const(sv));
2851                         SvPOK_on(sv);
2852                         *d = '\0';
2853                         /* See Note on sizing above.  */
2854                         sv_utf8_upgrade_flags_grow(sv,
2855                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2856                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2857                         d = SvPVX(sv) + SvCUR(sv);
2858                         has_utf8 = TRUE;
2859                     }
2860
2861                     if (has_utf8) {
2862                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2863                         if (PL_lex_inwhat == OP_TRANS &&
2864                             PL_sublex_info.sub_op) {
2865                             PL_sublex_info.sub_op->op_private |=
2866                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2867                                              : OPpTRANS_TO_UTF);
2868                         }
2869 #ifdef EBCDIC
2870                         if (uv > 255 && !dorange)
2871                             native_range = FALSE;
2872 #endif
2873                     }
2874                     else {
2875                         *d++ = (char)uv;
2876                     }
2877                 }
2878                 else {
2879                     *d++ = (char) uv;
2880                 }
2881                 continue;
2882
2883             /* \N{LATIN SMALL LETTER A} is a named character, and so is
2884              * \N{U+0041} */
2885             case 'N':
2886                 ++s;
2887                 if (*s == '{') {
2888                     char* e = strchr(s, '}');
2889                     SV *res;
2890                     STRLEN len;
2891                     const char *str;
2892
2893                     if (!e) {
2894                         yyerror("Missing right brace on \\N{}");
2895                         e = s - 1;
2896                         goto cont_scan;
2897                     }
2898                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2899                         /* \N{U+...} The ... is a unicode value even on EBCDIC
2900                          * machines */
2901                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2902                           PERL_SCAN_DISALLOW_PREFIX;
2903                         s += 3;
2904                         len = e - s;
2905                         uv = grok_hex(s, &len, &flags, NULL);
2906                         if ( e > s && len != (STRLEN)(e - s) ) {
2907                             uv = 0xFFFD;
2908                         }
2909                         s = e + 1;
2910                         goto NUM_ESCAPE_INSERT;
2911                     }
2912                     res = newSVpvn(s + 1, e - s - 1);
2913                     res = new_constant( NULL, 0, "charnames",
2914                                         res, NULL, s - 2, e - s + 3 );
2915                     if (has_utf8)
2916                         sv_utf8_upgrade(res);
2917                     str = SvPV_const(res,len);
2918 #ifdef EBCDIC_NEVER_MIND
2919                     /* charnames uses pack U and that has been
2920                      * recently changed to do the below uni->native
2921                      * mapping, so this would be redundant (and wrong,
2922                      * the code point would be doubly converted).
2923                      * But leave this in just in case the pack U change
2924                      * gets revoked, but the semantics is still
2925                      * desireable for charnames. --jhi */
2926                     {
2927                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2928
2929                          if (uv < 0x100) {
2930                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2931
2932                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2933                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2934                               str = SvPV_const(res, len);
2935                          }
2936                     }
2937 #endif
2938                     /* If destination is not in utf8 but this new character is,
2939                      * recode the dest to utf8 */
2940                     if (!has_utf8 && SvUTF8(res)) {
2941                         SvCUR_set(sv, d - SvPVX_const(sv));
2942                         SvPOK_on(sv);
2943                         *d = '\0';
2944                         /* See Note on sizing above.  */
2945                         sv_utf8_upgrade_flags_grow(sv,
2946                                             SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2947                                             len + (STRLEN)(send - s) + 1);
2948                         d = SvPVX(sv) + SvCUR(sv);
2949                         has_utf8 = TRUE;
2950                     } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2951
2952                         /* See Note on sizing above.  (NOTE: SvCUR() is not set
2953                          * correctly here). */
2954                         const STRLEN off = d - SvPVX_const(sv);
2955                         d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2956                     }
2957 #ifdef EBCDIC
2958                     if (!dorange)
2959                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2960 #endif
2961                     Copy(str, d, len, char);
2962                     d += len;
2963                     SvREFCNT_dec(res);
2964                   cont_scan:
2965                     s = e + 1;
2966                 }
2967                 else
2968                     yyerror("Missing braces on \\N{}");
2969                 continue;
2970
2971             /* \c is a control character */
2972             case 'c':
2973                 s++;
2974                 if (s < send) {
2975                     U8 c = *s++;
2976 #ifdef EBCDIC
2977                     if (isLOWER(c))
2978                         c = toUPPER(c);
2979 #endif
2980                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2981                 }
2982                 else {
2983                     yyerror("Missing control char name in \\c");
2984                 }
2985                 continue;
2986
2987             /* printf-style backslashes, formfeeds, newlines, etc */
2988             case 'b':
2989                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2990                 break;
2991             case 'n':
2992                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2993                 break;
2994             case 'r':
2995                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2996                 break;
2997             case 'f':
2998                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2999                 break;
3000             case 't':
3001                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3002                 break;
3003             case 'e':
3004                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3005                 break;
3006             case 'a':
3007                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3008                 break;
3009             } /* end switch */
3010
3011             s++;
3012             continue;
3013         } /* end if (backslash) */
3014 #ifdef EBCDIC
3015         else
3016             literal_endpoint++;
3017 #endif
3018
3019     default_action:
3020         /* If we started with encoded form, or already know we want it,
3021            then encode the next character */
3022         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3023             STRLEN len  = 1;
3024
3025
3026             /* One might think that it is wasted effort in the case of the
3027              * source being utf8 (this_utf8 == TRUE) to take the next character
3028              * in the source, convert it to an unsigned value, and then convert
3029              * it back again.  But the source has not been validated here.  The
3030              * routine that does the conversion checks for errors like
3031              * malformed utf8 */
3032
3033             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3034             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3035             if (!has_utf8) {
3036                 SvCUR_set(sv, d - SvPVX_const(sv));
3037                 SvPOK_on(sv);
3038                 *d = '\0';
3039                 /* See Note on sizing above.  */
3040                 sv_utf8_upgrade_flags_grow(sv,
3041                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3042                                         need + (STRLEN)(send - s) + 1);
3043                 d = SvPVX(sv) + SvCUR(sv);
3044                 has_utf8 = TRUE;
3045             } else if (need > len) {
3046                 /* encoded value larger than old, may need extra space (NOTE:
3047                  * SvCUR() is not set correctly here).   See Note on sizing
3048                  * above.  */
3049                 const STRLEN off = d - SvPVX_const(sv);
3050                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3051             }
3052             s += len;
3053
3054             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3055 #ifdef EBCDIC
3056             if (uv > 255 && !dorange)
3057                 native_range = FALSE;
3058 #endif
3059         }
3060         else {
3061             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3062         }
3063     } /* while loop to process each character */
3064
3065     /* terminate the string and set up the sv */
3066     *d = '\0';
3067     SvCUR_set(sv, d - SvPVX_const(sv));
3068     if (SvCUR(sv) >= SvLEN(sv))
3069         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3070
3071     SvPOK_on(sv);
3072     if (PL_encoding && !has_utf8) {
3073         sv_recode_to_utf8(sv, PL_encoding);
3074         if (SvUTF8(sv))
3075             has_utf8 = TRUE;
3076     }
3077     if (has_utf8) {
3078         SvUTF8_on(sv);
3079         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3080             PL_sublex_info.sub_op->op_private |=
3081                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3082         }
3083     }
3084
3085     /* shrink the sv if we allocated more than we used */
3086     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3087         SvPV_shrink_to_cur(sv);
3088     }
3089
3090     /* return the substring (via pl_yylval) only if we parsed anything */
3091     if (s > PL_bufptr) {
3092         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3093             const char *const key = PL_lex_inpat ? "qr" : "q";
3094             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3095             const char *type;
3096             STRLEN typelen;
3097
3098             if (PL_lex_inwhat == OP_TRANS) {
3099                 type = "tr";
3100                 typelen = 2;
3101             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3102                 type = "s";
3103                 typelen = 1;
3104             } else  {
3105                 type = "qq";
3106                 typelen = 2;
3107             }
3108
3109             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3110                                 type, typelen);
3111         }
3112         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3113     } else
3114         SvREFCNT_dec(sv);
3115     return s;
3116 }
3117
3118 /* S_intuit_more
3119  * Returns TRUE if there's more to the expression (e.g., a subscript),
3120  * FALSE otherwise.
3121  *
3122  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3123  *
3124  * ->[ and ->{ return TRUE
3125  * { and [ outside a pattern are always subscripts, so return TRUE
3126  * if we're outside a pattern and it's not { or [, then return FALSE
3127  * if we're in a pattern and the first char is a {
3128  *   {4,5} (any digits around the comma) returns FALSE
3129  * if we're in a pattern and the first char is a [
3130  *   [] returns FALSE
3131  *   [SOMETHING] has a funky algorithm to decide whether it's a
3132  *      character class or not.  It has to deal with things like
3133  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3134  * anything else returns TRUE
3135  */
3136
3137 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3138
3139 STATIC int
3140 S_intuit_more(pTHX_ register char *s)
3141 {
3142     dVAR;
3143
3144     PERL_ARGS_ASSERT_INTUIT_MORE;
3145
3146     if (PL_lex_brackets)
3147         return TRUE;
3148     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3149         return TRUE;
3150     if (*s != '{' && *s != '[')
3151         return FALSE;
3152     if (!PL_lex_inpat)
3153         return TRUE;
3154
3155     /* In a pattern, so maybe we have {n,m}. */
3156     if (*s == '{') {
3157         s++;
3158         if (!isDIGIT(*s))
3159             return TRUE;
3160         while (isDIGIT(*s))
3161             s++;
3162         if (*s == ',')
3163             s++;
3164         while (isDIGIT(*s))
3165             s++;
3166         if (*s == '}')
3167             return FALSE;
3168         return TRUE;
3169
3170     }
3171
3172     /* On the other hand, maybe we have a character class */
3173
3174     s++;
3175     if (*s == ']' || *s == '^')
3176         return FALSE;
3177     else {
3178         /* this is terrifying, and it works */
3179         int weight = 2;         /* let's weigh the evidence */
3180         char seen[256];
3181         unsigned char un_char = 255, last_un_char;
3182         const char * const send = strchr(s,']');
3183         char tmpbuf[sizeof PL_tokenbuf * 4];
3184
3185         if (!send)              /* has to be an expression */
3186             return TRUE;
3187
3188         Zero(seen,256,char);
3189         if (*s == '$')
3190             weight -= 3;
3191         else if (isDIGIT(*s)) {
3192             if (s[1] != ']') {
3193                 if (isDIGIT(s[1]) && s[2] == ']')
3194                     weight -= 10;
3195             }
3196             else
3197                 weight -= 100;
3198         }
3199         for (; s < send; s++) {
3200             last_un_char = un_char;
3201             un_char = (unsigned char)*s;
3202             switch (*s) {
3203             case '@':
3204             case '&':
3205             case '$':
3206                 weight -= seen[un_char] * 10;
3207                 if (isALNUM_lazy_if(s+1,UTF)) {
3208                     int len;
3209                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3210                     len = (int)strlen(tmpbuf);
3211                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3212                         weight -= 100;
3213                     else
3214                         weight -= 10;
3215                 }
3216                 else if (*s == '$' && s[1] &&
3217                   strchr("[#!%*<>()-=",s[1])) {
3218                     if (/*{*/ strchr("])} =",s[2]))
3219                         weight -= 10;
3220                     else
3221                         weight -= 1;
3222                 }
3223                 break;
3224             case '\\':
3225                 un_char = 254;
3226                 if (s[1]) {
3227                     if (strchr("wds]",s[1]))
3228                         weight += 100;
3229                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3230                         weight += 1;
3231                     else if (strchr("rnftbxcav",s[1]))
3232                         weight += 40;
3233                     else if (isDIGIT(s[1])) {
3234                         weight += 40;
3235                         while (s[1] && isDIGIT(s[1]))
3236                             s++;
3237                     }
3238                 }
3239                 else
3240                     weight += 100;
3241                 break;
3242             case '-':
3243                 if (s[1] == '\\')
3244                     weight += 50;
3245                 if (strchr("aA01! ",last_un_char))
3246                     weight += 30;
3247                 if (strchr("zZ79~",s[1]))
3248                     weight += 30;
3249                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3250                     weight -= 5;        /* cope with negative subscript */
3251                 break;
3252             default:
3253                 if (!isALNUM(last_un_char)
3254                     && !(last_un_char == '$' || last_un_char == '@'
3255                          || last_un_char == '&')
3256                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3257                     char *d = tmpbuf;
3258                     while (isALPHA(*s))
3259                         *d++ = *s++;
3260                     *d = '\0';
3261                     if (keyword(tmpbuf, d - tmpbuf, 0))
3262                         weight -= 150;
3263                 }
3264                 if (un_char == last_un_char + 1)
3265                     weight += 5;
3266                 weight -= seen[un_char];
3267                 break;
3268             }
3269             seen[un_char]++;
3270         }
3271         if (weight >= 0)        /* probably a character class */
3272             return FALSE;
3273     }
3274
3275     return TRUE;
3276 }
3277
3278 /*
3279  * S_intuit_method
3280  *
3281  * Does all the checking to disambiguate
3282  *   foo bar
3283  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3284  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3285  *
3286  * First argument is the stuff after the first token, e.g. "bar".
3287  *
3288  * Not a method if bar is a filehandle.
3289  * Not a method if foo is a subroutine prototyped to take a filehandle.
3290  * Not a method if it's really "Foo $bar"
3291  * Method if it's "foo $bar"
3292  * Not a method if it's really "print foo $bar"
3293  * Method if it's really "foo package::" (interpreted as package->foo)
3294  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3295  * Not a method if bar is a filehandle or package, but is quoted with
3296  *   =>
3297  */
3298
3299 STATIC int
3300 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3301 {
3302     dVAR;
3303     char *s = start + (*start == '$');
3304     char tmpbuf[sizeof PL_tokenbuf];
3305     STRLEN len;
3306     GV* indirgv;
3307 #ifdef PERL_MAD
3308     int soff;
3309 #endif
3310
3311     PERL_ARGS_ASSERT_INTUIT_METHOD;
3312
3313     if (gv) {
3314         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3315             return 0;
3316         if (cv) {
3317             if (SvPOK(cv)) {
3318                 const char *proto = SvPVX_const(cv);
3319                 if (proto) {
3320                     if (*proto == ';')
3321                         proto++;
3322                     if (*proto == '*')
3323                         return 0;
3324                 }
3325             }
3326         } else
3327             gv = NULL;
3328     }
3329     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3330     /* start is the beginning of the possible filehandle/object,
3331      * and s is the end of it
3332      * tmpbuf is a copy of it
3333      */
3334
3335     if (*start == '$') {
3336         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3337                 isUPPER(*PL_tokenbuf))
3338             return 0;
3339 #ifdef PERL_MAD
3340         len = start - SvPVX(PL_linestr);
3341 #endif
3342         s = PEEKSPACE(s);
3343 #ifdef PERL_MAD
3344         start = SvPVX(PL_linestr) + len;
3345 #endif
3346         PL_bufptr = start;
3347         PL_expect = XREF;
3348         return *s == '(' ? FUNCMETH : METHOD;
3349     }
3350     if (!keyword(tmpbuf, len, 0)) {
3351         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3352             len -= 2;
3353             tmpbuf[len] = '\0';
3354 #ifdef PERL_MAD
3355             soff = s - SvPVX(PL_linestr);
3356 #endif
3357             goto bare_package;
3358         }
3359         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3360         if (indirgv && GvCVu(indirgv))
3361             return 0;
3362         /* filehandle or package name makes it a method */
3363         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3364 #ifdef PERL_MAD
3365             soff = s - SvPVX(PL_linestr);
3366 #endif
3367             s = PEEKSPACE(s);
3368             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3369                 return 0;       /* no assumptions -- "=>" quotes bearword */
3370       bare_package:
3371             start_force(PL_curforce);
3372             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3373                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3374             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3375             if (PL_madskills)
3376                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3377             PL_expect = XTERM;
3378             force_next(WORD);
3379             PL_bufptr = s;
3380 #ifdef PERL_MAD
3381             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3382 #endif
3383             return *s == '(' ? FUNCMETH : METHOD;
3384         }
3385     }
3386     return 0;
3387 }
3388
3389 /* Encoded script support. filter_add() effectively inserts a
3390  * 'pre-processing' function into the current source input stream.
3391  * Note that the filter function only applies to the current source file
3392  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3393  *
3394  * The datasv parameter (which may be NULL) can be used to pass
3395  * private data to this instance of the filter. The filter function
3396  * can recover the SV using the FILTER_DATA macro and use it to
3397  * store private buffers and state information.
3398  *
3399  * The supplied datasv parameter is upgraded to a PVIO type
3400  * and the IoDIRP/IoANY field is used to store the function pointer,
3401  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3402  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3403  * private use must be set using malloc'd pointers.
3404  */
3405
3406 SV *
3407 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3408 {
3409     dVAR;
3410     if (!funcp)
3411         return NULL;
3412
3413     if (!PL_parser)
3414         return NULL;
3415
3416     if (!PL_rsfp_filters)
3417         PL_rsfp_filters = newAV();
3418     if (!datasv)
3419         datasv = newSV(0);
3420     SvUPGRADE(datasv, SVt_PVIO);
3421     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3422     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3423     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3424                           FPTR2DPTR(void *, IoANY(datasv)),
3425                           SvPV_nolen(datasv)));
3426     av_unshift(PL_rsfp_filters, 1);
3427     av_store(PL_rsfp_filters, 0, datasv) ;
3428     return(datasv);
3429 }
3430
3431
3432 /* Delete most recently added instance of this filter function. */
3433 void
3434 Perl_filter_del(pTHX_ filter_t funcp)
3435 {
3436     dVAR;
3437     SV *datasv;
3438
3439     PERL_ARGS_ASSERT_FILTER_DEL;
3440
3441 #ifdef DEBUGGING
3442     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3443                           FPTR2DPTR(void*, funcp)));
3444 #endif
3445     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3446         return;
3447     /* if filter is on top of stack (usual case) just pop it off */
3448     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3449     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3450         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3451         IoANY(datasv) = (void *)NULL;
3452         sv_free(av_pop(PL_rsfp_filters));
3453
3454         return;
3455     }
3456     /* we need to search for the correct entry and clear it     */
3457     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3458 }
3459
3460
3461 /* Invoke the idxth filter function for the current rsfp.        */
3462 /* maxlen 0 = read one text line */
3463 I32
3464 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3465 {
3466     dVAR;
3467     filter_t funcp;
3468     SV *datasv = NULL;
3469     /* This API is bad. It should have been using unsigned int for maxlen.
3470        Not sure if we want to change the API, but if not we should sanity
3471        check the value here.  */
3472     const unsigned int correct_length
3473         = maxlen < 0 ?
3474 #ifdef PERL_MICRO
3475         0x7FFFFFFF
3476 #else
3477         INT_MAX
3478 #endif
3479         : maxlen;
3480
3481     PERL_ARGS_ASSERT_FILTER_READ;
3482
3483     if (!PL_parser || !PL_rsfp_filters)
3484         return -1;
3485     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3486         /* Provide a default input filter to make life easy.    */
3487         /* Note that we append to the line. This is handy.      */
3488         DEBUG_P(PerlIO_printf(Perl_debug_log,
3489                               "filter_read %d: from rsfp\n", idx));
3490         if (correct_length) {
3491             /* Want a block */
3492             int len ;
3493             const int old_len = SvCUR(buf_sv);
3494
3495             /* ensure buf_sv is large enough */
3496             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3497             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3498                                    correct_length)) <= 0) {
3499                 if (PerlIO_error(PL_rsfp))
3500                     return -1;          /* error */
3501                 else
3502                     return 0 ;          /* end of file */
3503             }
3504             SvCUR_set(buf_sv, old_len + len) ;
3505             SvPVX(buf_sv)[old_len + len] = '\0';
3506         } else {
3507             /* Want a line */
3508             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3509                 if (PerlIO_error(PL_rsfp))
3510                     return -1;          /* error */
3511                 else
3512                     return 0 ;          /* end of file */
3513             }
3514         }
3515         return SvCUR(buf_sv);
3516     }
3517     /* Skip this filter slot if filter has been deleted */
3518     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3519         DEBUG_P(PerlIO_printf(Perl_debug_log,
3520                               "filter_read %d: skipped (filter deleted)\n",
3521                               idx));
3522         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3523     }
3524     /* Get function pointer hidden within datasv        */
3525     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3526     DEBUG_P(PerlIO_printf(Perl_debug_log,
3527                           "filter_read %d: via function %p (%s)\n",
3528                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3529     /* Call function. The function is expected to       */
3530     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3531     /* Return: <0:error, =0:eof, >0:not eof             */
3532     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3533 }
3534
3535 STATIC char *
3536 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3537 {
3538     dVAR;
3539
3540     PERL_ARGS_ASSERT_FILTER_GETS;
3541
3542 #ifdef PERL_CR_FILTER
3543     if (!PL_rsfp_filters) {
3544         filter_add(S_cr_textfilter,NULL);
3545     }
3546 #endif
3547     if (PL_rsfp_filters) {
3548         if (!append)
3549             SvCUR_set(sv, 0);   /* start with empty line        */
3550         if (FILTER_READ(0, sv, 0) > 0)
3551             return ( SvPVX(sv) ) ;
3552         else
3553             return NULL ;
3554     }
3555     else
3556         return (sv_gets(sv, PL_rsfp, append));
3557 }
3558
3559 STATIC HV *
3560 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3561 {
3562     dVAR;
3563     GV *gv;
3564
3565     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3566
3567     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3568         return PL_curstash;
3569
3570     if (len > 2 &&
3571         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3572         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3573     {
3574         return GvHV(gv);                        /* Foo:: */
3575     }
3576
3577     /* use constant CLASS => 'MyClass' */
3578     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3579     if (gv && GvCV(gv)) {
3580         SV * const sv = cv_const_sv(GvCV(gv));
3581         if (sv)
3582             pkgname = SvPV_const(sv, len);
3583     }
3584
3585     return gv_stashpvn(pkgname, len, 0);
3586 }
3587
3588 /*
3589  * S_readpipe_override
3590  * Check whether readpipe() is overriden, and generates the appropriate
3591  * optree, provided sublex_start() is called afterwards.
3592  */
3593 STATIC void
3594 S_readpipe_override(pTHX)
3595 {
3596     GV **gvp;
3597     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3598     pl_yylval.ival = OP_BACKTICK;
3599     if ((gv_readpipe
3600                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3601             ||
3602             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3603              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3604              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3605     {
3606         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3607             append_elem(OP_LIST,
3608                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3609                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3610     }
3611 }
3612
3613 #ifdef PERL_MAD
3614  /*
3615  * Perl_madlex
3616  * The intent of this yylex wrapper is to minimize the changes to the
3617  * tokener when we aren't interested in collecting madprops.  It remains
3618  * to be seen how successful this strategy will be...
3619  */
3620
3621 int
3622 Perl_madlex(pTHX)
3623 {
3624     int optype;
3625     char *s = PL_bufptr;
3626
3627     /* make sure PL_thiswhite is initialized */
3628     PL_thiswhite = 0;
3629     PL_thismad = 0;
3630
3631     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3632     if (PL_pending_ident)
3633         return S_pending_ident(aTHX);
3634
3635     /* previous token ate up our whitespace? */
3636     if (!PL_lasttoke && PL_nextwhite) {
3637         PL_thiswhite = PL_nextwhite;
3638         PL_nextwhite = 0;
3639     }
3640
3641     /* isolate the token, and figure out where it is without whitespace */
3642     PL_realtokenstart = -1;
3643     PL_thistoken = 0;
3644     optype = yylex();
3645     s = PL_bufptr;
3646     assert(PL_curforce < 0);
3647
3648     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3649         if (!PL_thistoken) {
3650             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3651                 PL_thistoken = newSVpvs("");
3652             else {
3653                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3654                 PL_thistoken = newSVpvn(tstart, s - tstart);
3655             }
3656         }
3657         if (PL_thismad) /* install head */
3658             CURMAD('X', PL_thistoken);
3659     }
3660
3661     /* last whitespace of a sublex? */
3662     if (optype == ')' && PL_endwhite) {
3663         CURMAD('X', PL_endwhite);
3664     }
3665
3666     if (!PL_thismad) {
3667
3668         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3669         if (!PL_thiswhite && !PL_endwhite && !optype) {
3670             sv_free(PL_thistoken);
3671             PL_thistoken = 0;
3672             return 0;
3673         }
3674
3675         /* put off final whitespace till peg */
3676         if (optype == ';' && !PL_rsfp) {
3677             PL_nextwhite = PL_thiswhite;
3678             PL_thiswhite = 0;
3679         }
3680         else if (PL_thisopen) {
3681             CURMAD('q', PL_thisopen);
3682             if (PL_thistoken)
3683                 sv_free(PL_thistoken);
3684             PL_thistoken = 0;
3685         }
3686         else {
3687             /* Store actual token text as madprop X */
3688             CURMAD('X', PL_thistoken);
3689         }
3690
3691         if (PL_thiswhite) {
3692             /* add preceding whitespace as madprop _ */
3693             CURMAD('_', PL_thiswhite);
3694         }
3695
3696         if (PL_thisstuff) {
3697             /* add quoted material as madprop = */
3698             CURMAD('=', PL_thisstuff);
3699         }
3700
3701         if (PL_thisclose) {
3702             /* add terminating quote as madprop Q */
3703             CURMAD('Q', PL_thisclose);
3704         }
3705     }
3706
3707     /* special processing based on optype */
3708
3709     switch (optype) {
3710
3711     /* opval doesn't need a TOKEN since it can already store mp */
3712     case WORD:
3713     case METHOD:
3714     case FUNCMETH:
3715     case THING:
3716     case PMFUNC:
3717     case PRIVATEREF:
3718     case FUNC0SUB:
3719     case UNIOPSUB:
3720     case LSTOPSUB:
3721         if (pl_yylval.opval)
3722             append_madprops(PL_thismad, pl_yylval.opval, 0);
3723         PL_thismad = 0;
3724         return optype;
3725
3726     /* fake EOF */
3727     case 0:
3728         optype = PEG;
3729         if (PL_endwhite) {
3730             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3731             PL_endwhite = 0;
3732         }
3733         break;
3734
3735     case ']':
3736     case '}':
3737         if (PL_faketokens)
3738             break;
3739         /* remember any fake bracket that lexer is about to discard */
3740         if (PL_lex_brackets == 1 &&
3741             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3742         {
3743             s = PL_bufptr;
3744             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3745                 s++;
3746             if (*s == '}') {
3747                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3748                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3749                 PL_thiswhite = 0;
3750                 PL_bufptr = s - 1;
3751                 break;  /* don't bother looking for trailing comment */
3752             }
3753             else
3754                 s = PL_bufptr;
3755         }
3756         if (optype == ']')
3757             break;
3758         /* FALLTHROUGH */
3759
3760     /* attach a trailing comment to its statement instead of next token */
3761     case ';':
3762         if (PL_faketokens)
3763             break;
3764         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3765             s = PL_bufptr;
3766             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3767                 s++;
3768             if (*s == '\n' || *s == '#') {
3769                 while (s < PL_bufend && *s != '\n')
3770                     s++;
3771                 if (s < PL_bufend)
3772                     s++;
3773                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3774                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3775                 PL_thiswhite = 0;
3776                 PL_bufptr = s;
3777             }
3778         }
3779         break;
3780
3781     /* pval */
3782     case LABEL:
3783         break;
3784
3785     /* ival */
3786     default:
3787         break;
3788
3789     }
3790
3791     /* Create new token struct.  Note: opvals return early above. */
3792     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3793     PL_thismad = 0;
3794     return optype;
3795 }
3796 #endif
3797
3798 STATIC char *
3799 S_tokenize_use(pTHX_ int is_use, char *s) {
3800     dVAR;
3801
3802     PERL_ARGS_ASSERT_TOKENIZE_USE;
3803
3804     if (PL_expect != XSTATE)
3805         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3806                     is_use ? "use" : "no"));
3807     s = SKIPSPACE1(s);
3808     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3809         s = force_version(s, TRUE);
3810         if (*s == ';' || *s == '}'
3811                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
3812             start_force(PL_curforce);
3813             NEXTVAL_NEXTTOKE.opval = NULL;
3814             force_next(WORD);
3815         }
3816         else if (*s == 'v') {
3817             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3818             s = force_version(s, FALSE);
3819         }
3820     }
3821     else {
3822         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3823         s = force_version(s, FALSE);
3824     }
3825     pl_yylval.ival = is_use;
3826     return s;
3827 }
3828 #ifdef DEBUGGING
3829     static const char* const exp_name[] =
3830         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3831           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3832         };
3833 #endif
3834
3835 /*
3836   yylex
3837
3838   Works out what to call the token just pulled out of the input
3839   stream.  The yacc parser takes care of taking the ops we return and
3840   stitching them into a tree.
3841
3842   Returns:
3843     PRIVATEREF
3844
3845   Structure:
3846       if read an identifier
3847           if we're in a my declaration
3848               croak if they tried to say my($foo::bar)
3849               build the ops for a my() declaration
3850           if it's an access to a my() variable
3851               are we in a sort block?
3852                   croak if my($a); $a <=> $b
3853               build ops for access to a my() variable
3854           if in a dq string, and they've said @foo and we can't find @foo
3855               croak
3856           build ops for a bareword
3857       if we already built the token before, use it.
3858 */
3859
3860
3861 #ifdef __SC__
3862 #pragma segment Perl_yylex
3863 #endif
3864 int
3865 Perl_yylex(pTHX)
3866 {
3867     dVAR;
3868     register char *s = PL_bufptr;
3869     register char *d;
3870     STRLEN len;
3871     bool bof = FALSE;
3872     U32 fake_eof = 0;
3873
3874     /* orig_keyword, gvp, and gv are initialized here because
3875      * jump to the label just_a_word_zero can bypass their
3876      * initialization later. */
3877     I32 orig_keyword = 0;
3878     GV *gv = NULL;
3879     GV **gvp = NULL;
3880
3881     DEBUG_T( {
3882         SV* tmp = newSVpvs("");
3883         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3884             (IV)CopLINE(PL_curcop),
3885             lex_state_names[PL_lex_state],
3886             exp_name[PL_expect],
3887             pv_display(tmp, s, strlen(s), 0, 60));
3888         SvREFCNT_dec(tmp);
3889     } );
3890     /* check if there's an identifier for us to look at */
3891     if (PL_pending_ident)
3892         return REPORT(S_pending_ident(aTHX));
3893
3894     /* no identifier pending identification */
3895
3896     switch (PL_lex_state) {
3897 #ifdef COMMENTARY
3898     case LEX_NORMAL:            /* Some compilers will produce faster */
3899     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3900         break;
3901 #endif
3902
3903     /* when we've already built the next token, just pull it out of the queue */
3904     case LEX_KNOWNEXT:
3905 #ifdef PERL_MAD
3906         PL_lasttoke--;
3907         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3908         if (PL_madskills) {
3909             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3910             PL_nexttoke[PL_lasttoke].next_mad = 0;
3911             if (PL_thismad && PL_thismad->mad_key == '_') {
3912                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3913                 PL_thismad->mad_val = 0;
3914                 mad_free(PL_thismad);
3915                 PL_thismad = 0;
3916             }
3917         }
3918         if (!PL_lasttoke) {
3919             PL_lex_state = PL_lex_defer;
3920             PL_expect = PL_lex_expect;
3921             PL_lex_defer = LEX_NORMAL;
3922             if (!PL_nexttoke[PL_lasttoke].next_type)
3923                 return yylex();
3924         }
3925 #else
3926         PL_nexttoke--;
3927         pl_yylval = PL_nextval[PL_nexttoke];
3928         if (!PL_nexttoke) {
3929             PL_lex_state = PL_lex_defer;
3930             PL_expect = PL_lex_expect;
3931             PL_lex_defer = LEX_NORMAL;
3932         }
3933 #endif
3934 #ifdef PERL_MAD
3935         /* FIXME - can these be merged?  */
3936         return(PL_nexttoke[PL_lasttoke].next_type);
3937 #else
3938         return REPORT(PL_nexttype[PL_nexttoke]);
3939 #endif
3940
3941     /* interpolated case modifiers like \L \U, including \Q and \E.
3942        when we get here, PL_bufptr is at the \
3943     */
3944     case LEX_INTERPCASEMOD:
3945 #ifdef DEBUGGING
3946         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3947             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3948 #endif
3949         /* handle \E or end of string */
3950         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3951             /* if at a \E */
3952             if (PL_lex_casemods) {
3953                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3954                 PL_lex_casestack[PL_lex_casemods] = '\0';
3955
3956                 if (PL_bufptr != PL_bufend
3957                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3958                     PL_bufptr += 2;
3959                     PL_lex_state = LEX_INTERPCONCAT;
3960 #ifdef PERL_MAD
3961                     if (PL_madskills)
3962                         PL_thistoken = newSVpvs("\\E");
3963 #endif
3964                 }
3965                 return REPORT(')');
3966             }
3967 #ifdef PERL_MAD
3968             while (PL_bufptr != PL_bufend &&
3969               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3970                 if (!PL_thiswhite)
3971                     PL_thiswhite = newSVpvs("");
3972                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3973                 PL_bufptr += 2;
3974             }
3975 #else
3976             if (PL_bufptr != PL_bufend)
3977                 PL_bufptr += 2;
3978 #endif
3979             PL_lex_state = LEX_INTERPCONCAT;
3980             return yylex();
3981         }
3982         else {
3983             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3984               "### Saw case modifier\n"); });
3985             s = PL_bufptr + 1;
3986             if (s[1] == '\\' && s[2] == 'E') {
3987 #ifdef PERL_MAD
3988                 if (!PL_thiswhite)
3989                     PL_thiswhite = newSVpvs("");
3990                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3991 #endif
3992                 PL_bufptr = s + 3;
3993                 PL_lex_state = LEX_INTERPCONCAT;
3994                 return yylex();
3995             }
3996             else {
3997                 I32 tmp;
3998                 if (!PL_madskills) /* when just compiling don't need correct */
3999                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4000                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4001                 if ((*s == 'L' || *s == 'U') &&
4002                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4003                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4004                     return REPORT(')');
4005                 }
4006                 if (PL_lex_casemods > 10)
4007                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4008                 PL_lex_casestack[PL_lex_casemods++] = *s;
4009                 PL_lex_casestack[PL_lex_casemods] = '\0';
4010                 PL_lex_state = LEX_INTERPCONCAT;
4011                 start_force(PL_curforce);
4012                 NEXTVAL_NEXTTOKE.ival = 0;
4013                 force_next('(');
4014                 start_force(PL_curforce);
4015                 if (*s == 'l')
4016                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4017                 else if (*s == 'u')
4018                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4019                 else if (*s == 'L')
4020                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4021                 else if (*s == 'U')
4022                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4023                 else if (*s == 'Q')
4024                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4025                 else
4026                     Perl_croak(aTHX_ "panic: yylex");
4027                 if (PL_madskills) {
4028                     SV* const tmpsv = newSVpvs("\\ ");
4029                     /* replace the space with the character we want to escape
4030                      */
4031                     SvPVX(tmpsv)[1] = *s;
4032                     curmad('_', tmpsv);
4033                 }
4034                 PL_bufptr = s + 1;
4035             }
4036             force_next(FUNC);
4037             if (PL_lex_starts) {
4038                 s = PL_bufptr;
4039                 PL_lex_starts = 0;
4040 #ifdef PERL_MAD
4041                 if (PL_madskills) {
4042                     if (PL_thistoken)
4043                         sv_free(PL_thistoken);
4044                     PL_thistoken = newSVpvs("");
4045                 }
4046 #endif
4047                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4048                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4049                     OPERATOR(',');
4050                 else
4051                     Aop(OP_CONCAT);
4052             }
4053             else
4054                 return yylex();
4055         }
4056
4057     case LEX_INTERPPUSH:
4058         return REPORT(sublex_push());
4059
4060     case LEX_INTERPSTART:
4061         if (PL_bufptr == PL_bufend)
4062             return REPORT(sublex_done());
4063         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4064               "### Interpolated variable\n"); });
4065         PL_expect = XTERM;
4066         PL_lex_dojoin = (*PL_bufptr == '@');
4067         PL_lex_state = LEX_INTERPNORMAL;
4068         if (PL_lex_dojoin) {
4069             start_force(PL_curforce);
4070             NEXTVAL_NEXTTOKE.ival = 0;
4071             force_next(',');
4072             start_force(PL_curforce);
4073             force_ident("\"", '$');
4074             start_force(PL_curforce);
4075             NEXTVAL_NEXTTOKE.ival = 0;
4076             force_next('$');
4077             start_force(PL_curforce);
4078             NEXTVAL_NEXTTOKE.ival = 0;
4079             force_next('(');
4080             start_force(PL_curforce);
4081             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4082             force_next(FUNC);
4083         }
4084         if (PL_lex_starts++) {
4085             s = PL_bufptr;
4086 #ifdef PERL_MAD
4087             if (PL_madskills) {
4088                 if (PL_thistoken)
4089                     sv_free(PL_thistoken);
4090                 PL_thistoken = newSVpvs("");
4091             }
4092 #endif
4093             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4094             if (!PL_lex_casemods && PL_lex_inpat)
4095                 OPERATOR(',');
4096             else
4097                 Aop(OP_CONCAT);
4098         }
4099         return yylex();
4100
4101     case LEX_INTERPENDMAYBE:
4102         if (intuit_more(PL_bufptr)) {
4103             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4104             break;
4105         }
4106         /* FALL THROUGH */
4107
4108     case LEX_INTERPEND:
4109         if (PL_lex_dojoin) {
4110             PL_lex_dojoin = FALSE;
4111             PL_lex_state = LEX_INTERPCONCAT;
4112 #ifdef PERL_MAD
4113             if (PL_madskills) {
4114                 if (PL_thistoken)
4115                     sv_free(PL_thistoken);
4116                 PL_thistoken = newSVpvs("");
4117             }
4118 #endif
4119             return REPORT(')');
4120         }
4121         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4122             && SvEVALED(PL_lex_repl))
4123         {
4124             if (PL_bufptr != PL_bufend)
4125                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4126             PL_lex_repl = NULL;
4127         }
4128         /* FALLTHROUGH */
4129     case LEX_INTERPCONCAT:
4130 #ifdef DEBUGGING
4131         if (PL_lex_brackets)
4132             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4133 #endif
4134         if (PL_bufptr == PL_bufend)
4135             return REPORT(sublex_done());
4136
4137         if (SvIVX(PL_linestr) == '\'') {
4138             SV *sv = newSVsv(PL_linestr);
4139             if (!PL_lex_inpat)
4140                 sv = tokeq(sv);
4141             else if ( PL_hints & HINT_NEW_RE )
4142                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4143             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4144             s = PL_bufend;
4145         }
4146         else {
4147             s = scan_const(PL_bufptr);
4148             if (*s == '\\')
4149                 PL_lex_state = LEX_INTERPCASEMOD;
4150             else
4151                 PL_lex_state = LEX_INTERPSTART;
4152         }
4153
4154         if (s != PL_bufptr) {
4155             start_force(PL_curforce);
4156             if (PL_madskills) {
4157                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4158             }
4159             NEXTVAL_NEXTTOKE = pl_yylval;
4160             PL_expect = XTERM;
4161             force_next(THING);
4162             if (PL_lex_starts++) {
4163 #ifdef PERL_MAD
4164                 if (PL_madskills) {
4165                     if (PL_thistoken)
4166                         sv_free(PL_thistoken);
4167                     PL_thistoken = newSVpvs("");
4168                 }
4169 #endif
4170                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4171                 if (!PL_lex_casemods && PL_lex_inpat)
4172                     OPERATOR(',');
4173                 else
4174                     Aop(OP_CONCAT);
4175             }
4176             else {
4177                 PL_bufptr = s;
4178                 return yylex();
4179             }
4180         }
4181
4182         return yylex();
4183     case LEX_FORMLINE:
4184         PL_lex_state = LEX_NORMAL;
4185         s = scan_formline(PL_bufptr);
4186         if (!PL_lex_formbrack)
4187             goto rightbracket;
4188         OPERATOR(';');
4189     }
4190
4191     s = PL_bufptr;
4192     PL_oldoldbufptr = PL_oldbufptr;
4193     PL_oldbufptr = s;
4194
4195   retry:
4196 #ifdef PERL_MAD
4197     if (PL_thistoken) {
4198         sv_free(PL_thistoken);
4199         PL_thistoken = 0;
4200     }
4201     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4202 #endif
4203     switch (*s) {
4204     default:
4205         if (isIDFIRST_lazy_if(s,UTF))
4206             goto keylookup;
4207         {
4208         unsigned char c = *s;
4209         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4210         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4211             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4212         } else {
4213             d = PL_linestart;
4214         }
4215         *s = '\0';
4216         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4217     }
4218     case 4:
4219     case 26:
4220         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4221     case 0:
4222 #ifdef PERL_MAD
4223         if (PL_madskills)
4224             PL_faketokens = 0;
4225 #endif
4226         if (!PL_rsfp) {
4227             PL_last_uni = 0;
4228             PL_last_lop = 0;
4229             if (PL_lex_brackets) {
4230                 yyerror((const char *)
4231                         (PL_lex_formbrack
4232                          ? "Format not terminated"
4233                          : "Missing right curly or square bracket"));
4234             }
4235             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4236                         "### Tokener got EOF\n");
4237             } );
4238             TOKEN(0);
4239         }
4240         if (s++ < PL_bufend)
4241             goto retry;                 /* ignore stray nulls */
4242         PL_last_uni = 0;
4243         PL_last_lop = 0;
4244         if (!PL_in_eval && !PL_preambled) {
4245             PL_preambled = TRUE;
4246 #ifdef PERL_MAD
4247             if (PL_madskills)
4248                 PL_faketokens = 1;
4249 #endif
4250             if (PL_perldb) {
4251                 /* Generate a string of Perl code to load the debugger.
4252                  * If PERL5DB is set, it will return the contents of that,
4253                  * otherwise a compile-time require of perl5db.pl.  */
4254
4255                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4256
4257                 if (pdb) {
4258                     sv_setpv(PL_linestr, pdb);
4259                     sv_catpvs(PL_linestr,";");
4260                 } else {
4261                     SETERRNO(0,SS_NORMAL);
4262                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4263                 }
4264             } else
4265                 sv_setpvs(PL_linestr,"");
4266             if (PL_preambleav) {
4267                 SV **svp = AvARRAY(PL_preambleav);
4268                 SV **const end = svp + AvFILLp(PL_preambleav);
4269                 while(svp <= end) {
4270                     sv_catsv(PL_linestr, *svp);
4271                     ++svp;
4272                     sv_catpvs(PL_linestr, ";");
4273                 }
4274                 sv_free(MUTABLE_SV(PL_preambleav));
4275                 PL_preambleav = NULL;
4276             }
4277             if (PL_minus_E)
4278                 sv_catpvs(PL_linestr,
4279                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4280             if (PL_minus_n || PL_minus_p) {
4281                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4282                 if (PL_minus_l)
4283                     sv_catpvs(PL_linestr,"chomp;");
4284                 if (PL_minus_a) {
4285                     if (PL_minus_F) {
4286                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4287                              || *PL_splitstr == '"')
4288                               && strchr(PL_splitstr + 1, *PL_splitstr))
4289                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4290                         else {
4291                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4292                                bytes can be used as quoting characters.  :-) */
4293                             const char *splits = PL_splitstr;
4294                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4295                             do {
4296                                 /* Need to \ \s  */
4297                                 if (*splits == '\\')
4298                                     sv_catpvn(PL_linestr, splits, 1);
4299                                 sv_catpvn(PL_linestr, splits, 1);
4300                             } while (*splits++);
4301                             /* This loop will embed the trailing NUL of
4302                                PL_linestr as the last thing it does before
4303                                terminating.  */
4304                             sv_catpvs(PL_linestr, ");");
4305                         }
4306                     }
4307                     else
4308                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4309                 }
4310             }
4311             sv_catpvs(PL_linestr, "\n");
4312             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4313             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4314             PL_last_lop = PL_last_uni = NULL;
4315             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4316                 update_debugger_info(PL_linestr, NULL, 0);
4317             goto retry;
4318         }
4319         do {
4320             fake_eof = 0;
4321             bof = PL_rsfp ? TRUE : FALSE;
4322             if (0) {
4323               fake_eof:
4324                 fake_eof = LEX_FAKE_EOF;
4325             }
4326             PL_bufptr = PL_bufend;
4327             CopLINE_inc(PL_curcop);
4328             if (!lex_next_chunk(fake_eof)) {
4329                 CopLINE_dec(PL_curcop);
4330                 s = PL_bufptr;
4331                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4332             }
4333             CopLINE_dec(PL_curcop);
4334 #ifdef PERL_MAD
4335             if (!PL_rsfp)
4336                 PL_realtokenstart = -1;
4337 #endif
4338             s = PL_bufptr;
4339             /* If it looks like the start of a BOM or raw UTF-16,
4340              * check if it in fact is. */
4341             if (bof && PL_rsfp &&
4342                      (*s == 0 ||
4343                       *(U8*)s == 0xEF ||
4344                       *(U8*)s >= 0xFE ||
4345                       s[1] == 0)) {
4346                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4347                 if (bof) {
4348                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4349                     s = swallow_bom((U8*)s);
4350                 }
4351             }
4352             if (PL_doextract) {
4353                 /* Incest with pod. */
4354 #ifdef PERL_MAD
4355                 if (PL_madskills)
4356                     sv_catsv(PL_thiswhite, PL_linestr);
4357 #endif
4358                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4359                     sv_setpvs(PL_linestr, "");
4360                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4361                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4362                     PL_last_lop = PL_last_uni = NULL;
4363                     PL_doextract = FALSE;
4364                 }
4365             }
4366             incline(s);
4367         } while (PL_doextract);
4368         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4369         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4370         PL_last_lop = PL_last_uni = NULL;
4371         if (CopLINE(PL_curcop) == 1) {
4372             while (s < PL_bufend && isSPACE(*s))
4373                 s++;
4374             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4375                 s++;
4376 #ifdef PERL_MAD
4377             if (PL_madskills)
4378                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4379 #endif
4380             d = NULL;
4381             if (!PL_in_eval) {
4382                 if (*s == '#' && *(s+1) == '!')
4383                     d = s + 2;
4384 #ifdef ALTERNATE_SHEBANG
4385                 else {
4386                     static char const as[] = ALTERNATE_SHEBANG;
4387                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4388                         d = s + (sizeof(as) - 1);
4389                 }
4390 #endif /* ALTERNATE_SHEBANG */
4391             }
4392             if (d) {
4393                 char *ipath;
4394                 char *ipathend;
4395
4396                 while (isSPACE(*d))
4397                     d++;
4398                 ipath = d;
4399                 while (*d && !isSPACE(*d))
4400                     d++;
4401                 ipathend = d;
4402
4403 #ifdef ARG_ZERO_IS_SCRIPT
4404                 if (ipathend > ipath) {
4405                     /*
4406                      * HP-UX (at least) sets argv[0] to the script name,
4407                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4408                      * at least, set argv[0] to the basename of the Perl
4409                      * interpreter. So, having found "#!", we'll set it right.
4410                      */
4411                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4412                                                     SVt_PV)); /* $^X */
4413                     assert(SvPOK(x) || SvGMAGICAL(x));
4414                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4415                         sv_setpvn(x, ipath, ipathend - ipath);
4416                         SvSETMAGIC(x);
4417                     }
4418                     else {
4419                         STRLEN blen;
4420                         STRLEN llen;
4421                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4422                         const char * const lstart = SvPV_const(x,llen);
4423                         if (llen < blen) {
4424                             bstart += blen - llen;
4425                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4426                                 sv_setpvn(x, ipath, ipathend - ipath);
4427                                 SvSETMAGIC(x);
4428                             }
4429                         }
4430                     }
4431                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4432                 }
4433 #endif /* ARG_ZERO_IS_SCRIPT */
4434
4435                 /*
4436                  * Look for options.
4437                  */
4438                 d = instr(s,"perl -");
4439                 if (!d) {
4440                     d = instr(s,"perl");
4441 #if defined(DOSISH)
4442                     /* avoid getting into infinite loops when shebang
4443                      * line contains "Perl" rather than "perl" */
4444                     if (!d) {
4445                         for (d = ipathend-4; d >= ipath; --d) {
4446                             if ((*d == 'p' || *d == 'P')
4447                                 && !ibcmp(d, "perl", 4))
4448                             {
4449                                 break;
4450                             }
4451                         }
4452                         if (d < ipath)
4453                             d = NULL;
4454                     }
4455 #endif
4456                 }
4457 #ifdef ALTERNATE_SHEBANG
4458                 /*
4459                  * If the ALTERNATE_SHEBANG on this system starts with a
4460                  * character that can be part of a Perl expression, then if
4461                  * we see it but not "perl", we're probably looking at the
4462                  * start of Perl code, not a request to hand off to some
4463                  * other interpreter.  Similarly, if "perl" is there, but
4464                  * not in the first 'word' of the line, we assume the line
4465                  * contains the start of the Perl program.
4466                  */
4467                 if (d && *s != '#') {
4468                     const char *c = ipath;
4469                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4470                         c++;
4471                     if (c < d)
4472                         d = NULL;       /* "perl" not in first word; ignore */
4473                     else
4474                         *s = '#';       /* Don't try to parse shebang line */
4475                 }
4476 #endif /* ALTERNATE_SHEBANG */
4477                 if (!d &&
4478                     *s == '#' &&
4479                     ipathend > ipath &&
4480                     !PL_minus_c &&
4481                     !instr(s,"indir") &&
4482                     instr(PL_origargv[0],"perl"))
4483                 {
4484                     dVAR;
4485                     char **newargv;
4486
4487                     *ipathend = '\0';
4488                     s = ipathend + 1;
4489                     while (s < PL_bufend && isSPACE(*s))
4490                         s++;
4491                     if (s < PL_bufend) {
4492                         Newx(newargv,PL_origargc+3,char*);
4493                         newargv[1] = s;
4494                         while (s < PL_bufend && !isSPACE(*s))
4495                             s++;
4496                         *s = '\0';
4497                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4498                     }
4499                     else
4500                         newargv = PL_origargv;
4501                     newargv[0] = ipath;
4502                     PERL_FPU_PRE_EXEC
4503                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4504                     PERL_FPU_POST_EXEC
4505                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4506                 }
4507                 if (d) {
4508                     while (*d && !isSPACE(*d))
4509                         d++;
4510                     while (SPACE_OR_TAB(*d))
4511                         d++;
4512
4513                     if (*d++ == '-') {
4514                         const bool switches_done = PL_doswitches;
4515                         const U32 oldpdb = PL_perldb;
4516                         const bool oldn = PL_minus_n;
4517                         const bool oldp = PL_minus_p;
4518                         const char *d1 = d;
4519
4520                         do {
4521                             bool baduni = FALSE;
4522                             if (*d1 == 'C') {
4523                                 const char *d2 = d1 + 1;
4524                                 if (parse_unicode_opts((const char **)&d2)
4525                                     != PL_unicode)
4526                                     baduni = TRUE;
4527                             }
4528                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4529                                 const char * const m = d1;
4530                                 while (*d1 && !isSPACE(*d1))
4531                                     d1++;
4532                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4533                                       (int)(d1 - m), m);
4534                             }
4535                             d1 = moreswitches(d1);
4536                         } while (d1);
4537                         if (PL_doswitches && !switches_done) {
4538                             int argc = PL_origargc;
4539                             char **argv = PL_origargv;
4540                             do {
4541                                 argc--,argv++;
4542                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4543                             init_argv_symbols(argc,argv);
4544                         }
4545                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4546                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4547                               /* if we have already added "LINE: while (<>) {",
4548                                  we must not do it again */
4549                         {
4550                             sv_setpvs(PL_linestr, "");
4551                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4552                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4553                             PL_last_lop = PL_last_uni = NULL;
4554                             PL_preambled = FALSE;
4555                             if (PERLDB_LINE || PERLDB_SAVESRC)
4556                                 (void)gv_fetchfile(PL_origfilename);
4557                             goto retry;
4558                         }
4559                     }
4560                 }
4561             }
4562         }
4563         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4564             PL_bufptr = s;
4565             PL_lex_state = LEX_FORMLINE;
4566             return yylex();
4567         }
4568         goto retry;
4569     case '\r':
4570 #ifdef PERL_STRICT_CR
4571         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4572         Perl_croak(aTHX_
4573       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4574 #endif
4575     case ' ': case '\t': case '\f': case 013:
4576 #ifdef PERL_MAD
4577         PL_realtokenstart = -1;
4578         if (!PL_thiswhite)
4579             PL_thiswhite = newSVpvs("");
4580         sv_catpvn(PL_thiswhite, s, 1);
4581 #endif
4582         s++;
4583         goto retry;
4584     case '#':
4585     case '\n':
4586 #ifdef PERL_MAD
4587         PL_realtokenstart = -1;
4588         if (PL_madskills)
4589             PL_faketokens = 0;
4590 #endif
4591         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4592             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4593                 /* handle eval qq[#line 1 "foo"\n ...] */
4594                 CopLINE_dec(PL_curcop);
4595                 incline(s);
4596             }
4597             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4598                 s = SKIPSPACE0(s);
4599                 if (!PL_in_eval || PL_rsfp)
4600                     incline(s);
4601             }
4602             else {
4603                 d = s;
4604                 while (d < PL_bufend && *d != '\n')
4605                     d++;
4606                 if (d < PL_bufend)
4607                     d++;
4608                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4609                   Perl_croak(aTHX_ "panic: input overflow");
4610 #ifdef PERL_MAD
4611                 if (PL_madskills)
4612                     PL_thiswhite = newSVpvn(s, d - s);
4613 #endif
4614                 s = d;
4615                 incline(s);
4616             }
4617             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4618                 PL_bufptr = s;
4619                 PL_lex_state = LEX_FORMLINE;
4620                 return yylex();
4621             }
4622         }
4623         else {
4624 #ifdef PERL_MAD
4625             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4626                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4627                     PL_faketokens = 0;
4628                     s = SKIPSPACE0(s);
4629                     TOKEN(PEG); /* make sure any #! line is accessible */
4630                 }
4631                 s = SKIPSPACE0(s);
4632             }
4633             else {
4634 /*              if (PL_madskills && PL_lex_formbrack) { */
4635                     d = s;
4636                     while (d < PL_bufend && *d != '\n')
4637                         d++;
4638                     if (d < PL_bufend)
4639                         d++;
4640                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4641                       Perl_croak(aTHX_ "panic: input overflow");
4642                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4643                         if (!PL_thiswhite)
4644                             PL_thiswhite = newSVpvs("");
4645                         if (CopLINE(PL_curcop) == 1) {
4646                             sv_setpvs(PL_thiswhite, "");
4647                             PL_faketokens = 0;
4648                         }
4649                         sv_catpvn(PL_thiswhite, s, d - s);
4650                     }
4651                     s = d;
4652 /*              }
4653                 *s = '\0';
4654                 PL_bufend = s; */
4655             }
4656 #else
4657             *s = '\0';
4658             PL_bufend = s;
4659 #endif
4660         }
4661         goto retry;
4662     case '-':
4663         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4664             I32 ftst = 0;
4665             char tmp;
4666
4667             s++;
4668             PL_bufptr = s;
4669             tmp = *s++;
4670
4671             while (s < PL_bufend && SPACE_OR_TAB(*s))
4672                 s++;
4673
4674             if (strnEQ(s,"=>",2)) {
4675                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4676                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4677                 OPERATOR('-');          /* unary minus */
4678             }
4679             PL_last_uni = PL_oldbufptr;
4680             switch (tmp) {
4681             case 'r': ftst = OP_FTEREAD;        break;
4682             case 'w': ftst = OP_FTEWRITE;       break;
4683             case 'x': ftst = OP_FTEEXEC;        break;
4684             case 'o': ftst = OP_FTEOWNED;       break;
4685             case 'R': ftst = OP_FTRREAD;        break;
4686             case 'W': ftst = OP_FTRWRITE;       break;
4687             case 'X': ftst = OP_FTREXEC;        break;
4688             case 'O': ftst = OP_FTROWNED;       break;
4689             case 'e': ftst = OP_FTIS;           break;
4690             case 'z': ftst = OP_FTZERO;         break;
4691             case 's': ftst = OP_FTSIZE;         break;
4692             case 'f': ftst = OP_FTFILE;         break;
4693             case 'd': ftst = OP_FTDIR;          break;
4694             case 'l': ftst = OP_FTLINK;         break;
4695             case 'p': ftst = OP_FTPIPE;         break;
4696             case 'S': ftst = OP_FTSOCK;         break;
4697             case 'u': ftst = OP_FTSUID;         break;
4698             case 'g': ftst = OP_FTSGID;         break;
4699             case 'k': ftst = OP_FTSVTX;         break;
4700             case 'b': ftst = OP_FTBLK;          break;
4701             case 'c': ftst = OP_FTCHR;          break;
4702             case 't': ftst = OP_FTTTY;          break;
4703             case 'T': ftst = OP_FTTEXT;         break;
4704             case 'B': ftst = OP_FTBINARY;       break;
4705             case 'M': case 'A': case 'C':
4706                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4707                 switch (tmp) {
4708                 case 'M': ftst = OP_FTMTIME;    break;
4709                 case 'A': ftst = OP_FTATIME;    break;
4710                 case 'C': ftst = OP_FTCTIME;    break;
4711                 default:                        break;
4712                 }
4713                 break;
4714             default:
4715                 break;
4716             }
4717             if (ftst) {
4718                 PL_last_lop_op = (OPCODE)ftst;
4719                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4720                         "### Saw file test %c\n", (int)tmp);
4721                 } );
4722                 FTST(ftst);
4723             }
4724             else {
4725                 /* Assume it was a minus followed by a one-letter named
4726                  * subroutine call (or a -bareword), then. */
4727                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4728                         "### '-%c' looked like a file test but was not\n",
4729                         (int) tmp);
4730                 } );
4731                 s = --PL_bufptr;
4732             }
4733         }
4734         {
4735             const char tmp = *s++;
4736             if (*s == tmp) {
4737                 s++;
4738                 if (PL_expect == XOPERATOR)
4739                     TERM(POSTDEC);
4740                 else
4741                     OPERATOR(PREDEC);
4742             }
4743             else if (*s == '>') {
4744                 s++;
4745                 s = SKIPSPACE1(s);
4746                 if (isIDFIRST_lazy_if(s,UTF)) {
4747                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4748                     TOKEN(ARROW);
4749                 }
4750                 else if (*s == '$')
4751                     OPERATOR(ARROW);
4752                 else
4753                     TERM(ARROW);
4754             }
4755             if (PL_expect == XOPERATOR)
4756                 Aop(OP_SUBTRACT);
4757             else {
4758                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4759                     check_uni();
4760                 OPERATOR('-');          /* unary minus */
4761             }
4762         }
4763
4764     case '+':
4765         {
4766             const char tmp = *s++;
4767             if (*s == tmp) {
4768                 s++;
4769                 if (PL_expect == XOPERATOR)
4770                     TERM(POSTINC);
4771                 else
4772                     OPERATOR(PREINC);
4773             }
4774             if (PL_expect == XOPERATOR)
4775                 Aop(OP_ADD);
4776             else {
4777                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4778                     check_uni();
4779                 OPERATOR('+');
4780             }
4781         }
4782
4783     case '*':
4784         if (PL_expect != XOPERATOR) {
4785             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4786             PL_expect = XOPERATOR;
4787             force_ident(PL_tokenbuf, '*');
4788             if (!*PL_tokenbuf)
4789                 PREREF('*');
4790             TERM('*');
4791         }
4792         s++;
4793         if (*s == '*') {
4794             s++;
4795             PWop(OP_POW);
4796         }
4797         Mop(OP_MULTIPLY);
4798
4799     case '%':
4800         if (PL_expect == XOPERATOR) {
4801             ++s;
4802             Mop(OP_MODULO);
4803         }
4804         PL_tokenbuf[0] = '%';
4805         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4806                 sizeof PL_tokenbuf - 1, FALSE);
4807         if (!PL_tokenbuf[1]) {
4808             PREREF('%');
4809         }
4810         PL_pending_ident = '%';
4811         TERM('%');
4812
4813     case '^':
4814         s++;
4815         BOop(OP_BIT_XOR);
4816     case '[':
4817         PL_lex_brackets++;
4818         {
4819             const char tmp = *s++;
4820             OPERATOR(tmp);
4821         }
4822     case '~':
4823         if (s[1] == '~'
4824             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4825         {
4826             s += 2;
4827             Eop(OP_SMARTMATCH);
4828         }
4829     case ',':
4830         {
4831             const char tmp = *s++;
4832             OPERATOR(tmp);
4833         }
4834     case ':':
4835         if (s[1] == ':') {
4836             len = 0;
4837             goto just_a_word_zero_gv;
4838         }
4839         s++;
4840         switch (PL_expect) {
4841             OP *attrs;
4842 #ifdef PERL_MAD
4843             I32 stuffstart;
4844 #endif
4845         case XOPERATOR:
4846             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4847                 break;
4848             PL_bufptr = s;      /* update in case we back off */
4849             if (*s == '=') {
4850                 deprecate(":= for an empty attribute list");
4851             }
4852             goto grabattrs;
4853         case XATTRBLOCK:
4854             PL_expect = XBLOCK;
4855             goto grabattrs;
4856         case XATTRTERM:
4857             PL_expect = XTERMBLOCK;
4858          grabattrs:
4859 #ifdef PERL_MAD
4860             stuffstart = s - SvPVX(PL_linestr) - 1;
4861 #endif
4862             s = PEEKSPACE(s);
4863             attrs = NULL;
4864             while (isIDFIRST_lazy_if(s,UTF)) {
4865                 I32 tmp;
4866                 SV *sv;
4867                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4868                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4869                     if (tmp < 0) tmp = -tmp;
4870                     switch (tmp) {
4871                     case KEY_or:
4872                     case KEY_and:
4873                     case KEY_for:
4874                     case KEY_foreach:
4875                     case KEY_unless:
4876                     case KEY_if:
4877                     case KEY_while:
4878                     case KEY_until:
4879                         goto got_attrs;
4880                     default:
4881                         break;
4882                     }
4883                 }
4884                 sv = newSVpvn(s, len);
4885                 if (*d == '(') {
4886                     d = scan_str(d,TRUE,TRUE);
4887                     if (!d) {
4888                         /* MUST advance bufptr here to avoid bogus
4889                            "at end of line" context messages from yyerror().
4890                          */
4891                         PL_bufptr = s + len;
4892                         yyerror("Unterminated attribute parameter in attribute list");
4893                         if (attrs)
4894                             op_free(attrs);
4895                         sv_free(sv);
4896                         return REPORT(0);       /* EOF indicator */
4897                     }
4898                 }
4899                 if (PL_lex_stuff) {
4900                     sv_catsv(sv, PL_lex_stuff);
4901                     attrs = append_elem(OP_LIST, attrs,
4902                                         newSVOP(OP_CONST, 0, sv));
4903                     SvREFCNT_dec(PL_lex_stuff);
4904                     PL_lex_stuff = NULL;
4905                 }
4906                 else {
4907                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4908                         sv_free(sv);
4909                         if (PL_in_my == KEY_our) {
4910                             deprecate(":unique");
4911                         }
4912                         else
4913                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4914                     }
4915
4916                     /* NOTE: any CV attrs applied here need to be part of
4917                        the CVf_BUILTIN_ATTRS define in cv.h! */
4918                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4919                         sv_free(sv);
4920                         CvLVALUE_on(PL_compcv);
4921                     }
4922                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4923                         sv_free(sv);
4924                         deprecate(":locked");
4925                     }
4926                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4927                         sv_free(sv);
4928                         CvMETHOD_on(PL_compcv);
4929                     }
4930                     /* After we've set the flags, it could be argued that
4931                        we don't need to do the attributes.pm-based setting
4932                        process, and shouldn't bother appending recognized
4933                        flags.  To experiment with that, uncomment the
4934                        following "else".  (Note that's already been
4935                        uncommented.  That keeps the above-applied built-in
4936                        attributes from being intercepted (and possibly
4937                        rejected) by a package's attribute routines, but is
4938                        justified by the performance win for the common case
4939                        of applying only built-in attributes.) */
4940                     else
4941                         attrs = append_elem(OP_LIST, attrs,
4942                                             newSVOP(OP_CONST, 0,
4943                                                     sv));
4944                 }
4945                 s = PEEKSPACE(d);
4946                 if (*s == ':' && s[1] != ':')
4947                     s = PEEKSPACE(s+1);
4948                 else if (s == d)
4949                     break;      /* require real whitespace or :'s */
4950                 /* XXX losing whitespace on sequential attributes here */
4951             }
4952             {
4953                 const char tmp
4954                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4955                 if (*s != ';' && *s != '}' && *s != tmp
4956                     && (tmp != '=' || *s != ')')) {
4957                     const char q = ((*s == '\'') ? '"' : '\'');
4958                     /* If here for an expression, and parsed no attrs, back
4959                        off. */
4960                     if (tmp == '=' && !attrs) {
4961                         s = PL_bufptr;
4962                         break;
4963                     }
4964                     /* MUST advance bufptr here to avoid bogus "at end of line"
4965                        context messages from yyerror().
4966                     */
4967                     PL_bufptr = s;
4968                     yyerror( (const char *)
4969                              (*s
4970                               ? Perl_form(aTHX_ "Invalid separator character "
4971                                           "%c%c%c in attribute list", q, *s, q)
4972                               : "Unterminated attribute list" ) );
4973                     if (attrs)
4974                         op_free(attrs);
4975                     OPERATOR(':');
4976                 }
4977             }
4978         got_attrs:
4979             if (attrs) {
4980                 start_force(PL_curforce);
4981                 NEXTVAL_NEXTTOKE.opval = attrs;
4982                 CURMAD('_', PL_nextwhite);
4983                 force_next(THING);
4984             }
4985 #ifdef PERL_MAD
4986             if (PL_madskills) {
4987                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4988                                      (s - SvPVX(PL_linestr)) - stuffstart);
4989             }
4990 #endif
4991             TOKEN(COLONATTR);
4992         }
4993         OPERATOR(':');
4994     case '(':
4995         s++;
4996         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4997             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
4998         else
4999             PL_expect = XTERM;
5000         s = SKIPSPACE1(s);
5001         TOKEN('(');
5002     case ';':
5003         CLINE;
5004         {
5005             const char tmp = *s++;
5006             OPERATOR(tmp);
5007         }
5008     case ')':
5009         {
5010             const char tmp = *s++;
5011             s = SKIPSPACE1(s);
5012             if (*s == '{')
5013                 PREBLOCK(tmp);
5014             TERM(tmp);
5015         }
5016     case ']':
5017         s++;
5018         if (PL_lex_brackets <= 0)
5019             yyerror("Unmatched right square bracket");
5020         else
5021             --PL_lex_brackets;
5022         if (PL_lex_state == LEX_INTERPNORMAL) {
5023             if (PL_lex_brackets == 0) {
5024                 if (*s == '-' && s[1] == '>')
5025                     PL_lex_state = LEX_INTERPENDMAYBE;
5026                 else if (*s != '[' && *s != '{')
5027                     PL_lex_state = LEX_INTERPEND;
5028             }
5029         }
5030         TERM(']');
5031     case '{':
5032       leftbracket:
5033         s++;
5034         if (PL_lex_brackets > 100) {
5035             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5036         }
5037         switch (PL_expect) {
5038         case XTERM:
5039             if (PL_lex_formbrack) {
5040                 s--;
5041                 PRETERMBLOCK(DO);
5042             }
5043             if (PL_oldoldbufptr == PL_last_lop)
5044                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5045             else
5046                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5047             OPERATOR(HASHBRACK);
5048         case XOPERATOR:
5049             while (s < PL_bufend && SPACE_OR_TAB(*s))
5050                 s++;
5051             d = s;
5052             PL_tokenbuf[0] = '\0';
5053             if (d < PL_bufend && *d == '-') {
5054                 PL_tokenbuf[0] = '-';
5055                 d++;
5056                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5057                     d++;
5058             }
5059             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5060                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5061                               FALSE, &len);
5062                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5063                     d++;
5064                 if (*d == '}') {
5065                     const char minus = (PL_tokenbuf[0] == '-');
5066                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5067                     if (minus)
5068                         force_next('-');
5069                 }
5070             }
5071             /* FALL THROUGH */
5072         case XATTRBLOCK:
5073         case XBLOCK:
5074             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5075             PL_expect = XSTATE;
5076             break;
5077         case XATTRTERM:
5078         case XTERMBLOCK:
5079             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5080             PL_expect = XSTATE;
5081             break;
5082         default: {
5083                 const char *t;
5084                 if (PL_oldoldbufptr == PL_last_lop)
5085                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5086                 else
5087                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5088                 s = SKIPSPACE1(s);
5089                 if (*s == '}') {
5090                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5091                         PL_expect = XTERM;
5092                         /* This hack is to get the ${} in the message. */
5093                         PL_bufptr = s+1;
5094                         yyerror("syntax error");
5095                         break;
5096                     }
5097                     OPERATOR(HASHBRACK);
5098                 }
5099                 /* This hack serves to disambiguate a pair of curlies
5100                  * as being a block or an anon hash.  Normally, expectation
5101                  * determines that, but in cases where we're not in a
5102                  * position to expect anything in particular (like inside
5103                  * eval"") we have to resolve the ambiguity.  This code
5104                  * covers the case where the first term in the curlies is a
5105                  * quoted string.  Most other cases need to be explicitly
5106                  * disambiguated by prepending a "+" before the opening
5107                  * curly in order to force resolution as an anon hash.
5108                  *
5109                  * XXX should probably propagate the outer expectation
5110                  * into eval"" to rely less on this hack, but that could
5111                  * potentially break current behavior of eval"".
5112                  * GSAR 97-07-21
5113                  */
5114                 t = s;
5115                 if (*s == '\'' || *s == '"' || *s == '`') {
5116                     /* common case: get past first string, handling escapes */
5117                     for (t++; t < PL_bufend && *t != *s;)
5118                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5119                             t++;
5120                     t++;
5121                 }
5122                 else if (*s == 'q') {
5123                     if (++t < PL_bufend
5124                         && (!isALNUM(*t)
5125                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5126                                 && !isALNUM(*t))))
5127                     {
5128                         /* skip q//-like construct */
5129                         const char *tmps;
5130                         char open, close, term;
5131                         I32 brackets = 1;
5132
5133                         while (t < PL_bufend && isSPACE(*t))
5134                             t++;
5135                         /* check for q => */
5136                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5137                             OPERATOR(HASHBRACK);
5138                         }
5139                         term = *t;
5140                         open = term;
5141                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5142                             term = tmps[5];
5143                         close = term;
5144                         if (open == close)
5145                             for (t++; t < PL_bufend; t++) {
5146                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5147                                     t++;
5148                                 else if (*t == open)
5149                                     break;
5150                             }
5151                         else {
5152                             for (t++; t < PL_bufend; t++) {
5153                                 if (*t == '\\' && t+1 < PL_bufend)
5154                                     t++;
5155                                 else if (*t == close && --brackets <= 0)
5156                                     break;
5157                                 else if (*t == open)
5158                                     brackets++;
5159                             }
5160                         }
5161                         t++;
5162                     }
5163                     else
5164                         /* skip plain q word */
5165                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5166                              t += UTF8SKIP(t);
5167                 }
5168                 else if (isALNUM_lazy_if(t,UTF)) {
5169                     t += UTF8SKIP(t);
5170                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5171                          t += UTF8SKIP(t);
5172                 }
5173                 while (t < PL_bufend && isSPACE(*t))
5174                     t++;
5175                 /* if comma follows first term, call it an anon hash */
5176                 /* XXX it could be a comma expression with loop modifiers */
5177                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5178                                    || (*t == '=' && t[1] == '>')))
5179                     OPERATOR(HASHBRACK);
5180                 if (PL_expect == XREF)
5181                     PL_expect = XTERM;
5182                 else {
5183                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5184                     PL_expect = XSTATE;
5185                 }
5186             }
5187             break;
5188         }
5189         pl_yylval.ival = CopLINE(PL_curcop);
5190         if (isSPACE(*s) || *s == '#')
5191             PL_copline = NOLINE;   /* invalidate current command line number */
5192         TOKEN('{');
5193     case '}':
5194       rightbracket:
5195         s++;
5196         if (PL_lex_brackets <= 0)
5197             yyerror("Unmatched right curly bracket");
5198         else
5199             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5200         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5201             PL_lex_formbrack = 0;
5202         if (PL_lex_state == LEX_INTERPNORMAL) {
5203             if (PL_lex_brackets == 0) {
5204                 if (PL_expect & XFAKEBRACK) {
5205                     PL_expect &= XENUMMASK;
5206                     PL_lex_state = LEX_INTERPEND;
5207                     PL_bufptr = s;
5208 #if 0
5209                     if (PL_madskills) {
5210                         if (!PL_thiswhite)
5211                             PL_thiswhite = newSVpvs("");
5212                         sv_catpvs(PL_thiswhite,"}");
5213                     }
5214 #endif
5215                     return yylex();     /* ignore fake brackets */
5216                 }
5217                 if (*s == '-' && s[1] == '>')
5218                     PL_lex_state = LEX_INTERPENDMAYBE;
5219                 else if (*s != '[' && *s != '{')
5220                     PL_lex_state = LEX_INTERPEND;
5221             }
5222         }
5223         if (PL_expect & XFAKEBRACK) {
5224             PL_expect &= XENUMMASK;
5225             PL_bufptr = s;
5226             return yylex();             /* ignore fake brackets */
5227         }
5228         start_force(PL_curforce);
5229         if (PL_madskills) {
5230             curmad('X', newSVpvn(s-1,1));
5231             CURMAD('_', PL_thiswhite);
5232         }
5233         force_next('}');
5234 #ifdef PERL_MAD
5235         if (!PL_thistoken)
5236             PL_thistoken = newSVpvs("");
5237 #endif
5238         TOKEN(';');
5239     case '&':
5240         s++;
5241         if (*s++ == '&')
5242             AOPERATOR(ANDAND);
5243         s--;
5244         if (PL_expect == XOPERATOR) {
5245             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5246                 && isIDFIRST_lazy_if(s,UTF))
5247             {
5248                 CopLINE_dec(PL_curcop);
5249                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5250                 CopLINE_inc(PL_curcop);
5251             }
5252             BAop(OP_BIT_AND);
5253         }
5254
5255         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5256         if (*PL_tokenbuf) {
5257             PL_expect = XOPERATOR;
5258             force_ident(PL_tokenbuf, '&');
5259         }
5260         else
5261             PREREF('&');
5262         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5263         TERM('&');
5264
5265     case '|':
5266         s++;
5267         if (*s++ == '|')
5268             AOPERATOR(OROR);
5269         s--;
5270         BOop(OP_BIT_OR);
5271     case '=':
5272         s++;
5273         {
5274             const char tmp = *s++;
5275             if (tmp == '=')
5276                 Eop(OP_EQ);
5277             if (tmp == '>')
5278                 OPERATOR(',');
5279             if (tmp == '~')
5280                 PMop(OP_MATCH);
5281             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5282                 && strchr("+-*/%.^&|<",tmp))
5283                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5284                             "Reversed %c= operator",(int)tmp);
5285             s--;
5286             if (PL_expect == XSTATE && isALPHA(tmp) &&
5287                 (s == PL_linestart+1 || s[-2] == '\n') )
5288                 {
5289                     if (PL_in_eval && !PL_rsfp) {
5290                         d = PL_bufend;
5291                         while (s < d) {
5292                             if (*s++ == '\n') {
5293                                 incline(s);
5294                                 if (strnEQ(s,"=cut",4)) {
5295                                     s = strchr(s,'\n');
5296                                     if (s)
5297                                         s++;
5298                                     else
5299                                         s = d;
5300                                     incline(s);
5301                                     goto retry;
5302                                 }
5303                             }
5304                         }
5305                         goto retry;
5306                     }
5307 #ifdef PERL_MAD
5308                     if (PL_madskills) {
5309                         if (!PL_thiswhite)
5310                             PL_thiswhite = newSVpvs("");
5311                         sv_catpvn(PL_thiswhite, PL_linestart,
5312                                   PL_bufend - PL_linestart);
5313                     }
5314 #endif
5315                     s = PL_bufend;
5316                     PL_doextract = TRUE;
5317                     goto retry;
5318                 }
5319         }
5320         if (PL_lex_brackets < PL_lex_formbrack) {
5321             const char *t = s;
5322 #ifdef PERL_STRICT_CR
5323             while (SPACE_OR_TAB(*t))
5324 #else
5325             while (SPACE_OR_TAB(*t) || *t == '\r')
5326 #endif
5327                 t++;
5328             if (*t == '\n' || *t == '#') {
5329                 s--;
5330                 PL_expect = XBLOCK;
5331                 goto leftbracket;
5332             }
5333         }
5334         pl_yylval.ival = 0;
5335         OPERATOR(ASSIGNOP);
5336     case '!':
5337         s++;
5338         {
5339             const char tmp = *s++;
5340             if (tmp == '=') {
5341                 /* was this !=~ where !~ was meant?
5342                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5343
5344                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5345                     const char *t = s+1;
5346
5347                     while (t < PL_bufend && isSPACE(*t))
5348                         ++t;
5349
5350                     if (*t == '/' || *t == '?' ||
5351                         ((*t == 'm' || *t == 's' || *t == 'y')
5352                          && !isALNUM(t[1])) ||
5353                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5354                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5355                                     "!=~ should be !~");
5356                 }
5357                 Eop(OP_NE);
5358             }
5359             if (tmp == '~')
5360                 PMop(OP_NOT);
5361         }
5362         s--;
5363         OPERATOR('!');
5364     case '<':
5365         if (PL_expect != XOPERATOR) {
5366             if (s[1] != '<' && !strchr(s,'>'))
5367                 check_uni();
5368             if (s[1] == '<')
5369                 s = scan_heredoc(s);
5370             else
5371                 s = scan_inputsymbol(s);
5372             TERM(sublex_start());
5373         }
5374         s++;
5375         {
5376             char tmp = *s++;
5377             if (tmp == '<')
5378                 SHop(OP_LEFT_SHIFT);
5379             if (tmp == '=') {
5380                 tmp = *s++;
5381                 if (tmp == '>')
5382                     Eop(OP_NCMP);
5383                 s--;
5384                 Rop(OP_LE);
5385             }
5386         }
5387         s--;
5388         Rop(OP_LT);
5389     case '>':
5390         s++;
5391         {
5392             const char tmp = *s++;
5393             if (tmp == '>')
5394                 SHop(OP_RIGHT_SHIFT);
5395             else if (tmp == '=')
5396                 Rop(OP_GE);
5397         }
5398         s--;
5399         Rop(OP_GT);
5400
5401     case '$':
5402         CLINE;
5403
5404         if (PL_expect == XOPERATOR) {
5405             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5406                 return deprecate_commaless_var_list();
5407             }
5408         }
5409
5410         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5411             PL_tokenbuf[0] = '@';
5412             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5413                            sizeof PL_tokenbuf - 1, FALSE);
5414             if (PL_expect == XOPERATOR)
5415                 no_op("Array length", s);
5416             if (!PL_tokenbuf[1])
5417                 PREREF(DOLSHARP);
5418             PL_expect = XOPERATOR;
5419             PL_pending_ident = '#';
5420             TOKEN(DOLSHARP);
5421         }
5422
5423         PL_tokenbuf[0] = '$';
5424         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5425                        sizeof PL_tokenbuf - 1, FALSE);
5426         if (PL_expect == XOPERATOR)
5427             no_op("Scalar", s);
5428         if (!PL_tokenbuf[1]) {
5429             if (s == PL_bufend)
5430                 yyerror("Final $ should be \\$ or $name");
5431             PREREF('$');
5432         }
5433
5434         /* This kludge not intended to be bulletproof. */
5435         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5436             pl_yylval.opval = newSVOP(OP_CONST, 0,
5437                                    newSViv(CopARYBASE_get(&PL_compiling)));
5438             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5439             TERM(THING);
5440         }
5441
5442         d = s;
5443         {
5444             const char tmp = *s;
5445             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5446                 s = SKIPSPACE1(s);
5447
5448             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5449                 && intuit_more(s)) {
5450                 if (*s == '[') {
5451                     PL_tokenbuf[0] = '@';
5452                     if (ckWARN(WARN_SYNTAX)) {
5453                         char *t = s+1;
5454
5455                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5456                             t++;
5457                         if (*t++ == ',') {
5458                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5459                             while (t < PL_bufend && *t != ']')
5460                                 t++;
5461                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5462                                         "Multidimensional syntax %.*s not supported",
5463                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5464                         }
5465                     }
5466                 }
5467                 else if (*s == '{') {
5468                     char *t;
5469                     PL_tokenbuf[0] = '%';
5470                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5471                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5472                         {
5473                             char tmpbuf[sizeof PL_tokenbuf];
5474                             do {
5475                                 t++;
5476                             } while (isSPACE(*t));
5477                             if (isIDFIRST_lazy_if(t,UTF)) {
5478                                 STRLEN len;
5479                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5480                                               &len);
5481                                 while (isSPACE(*t))
5482                                     t++;
5483                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5484                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5485                                                 "You need to quote \"%s\"",
5486                                                 tmpbuf);
5487                             }
5488                         }
5489                 }
5490             }
5491
5492             PL_expect = XOPERATOR;
5493             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5494                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5495                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5496                     PL_expect = XOPERATOR;
5497                 else if (strchr("$@\"'`q", *s))
5498                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5499                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5500                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5501                 else if (isIDFIRST_lazy_if(s,UTF)) {
5502                     char tmpbuf[sizeof PL_tokenbuf];
5503                     int t2;
5504                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5505                     if ((t2 = keyword(tmpbuf, len, 0))) {
5506                         /* binary operators exclude handle interpretations */
5507                         switch (t2) {
5508                         case -KEY_x:
5509                         case -KEY_eq:
5510                         case -KEY_ne:
5511                         case -KEY_gt:
5512                         case -KEY_lt:
5513                         case -KEY_ge:
5514                         case -KEY_le:
5515                         case -KEY_cmp:
5516                             break;
5517                         default:
5518                             PL_expect = XTERM;  /* e.g. print $fh length() */
5519                             break;
5520                         }
5521                     }
5522                     else {
5523                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5524                     }
5525                 }
5526                 else if (isDIGIT(*s))
5527                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5528                 else if (*s == '.' && isDIGIT(s[1]))
5529                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5530                 else if ((*s == '?' || *s == '-' || *s == '+')
5531                          && !isSPACE(s[1]) && s[1] != '=')
5532                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5533                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5534                          && s[1] != '/')
5535                     PL_expect = XTERM;          /* e.g. print $fh /.../
5536                                                    XXX except DORDOR operator
5537                                                 */
5538                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5539                          && s[2] != '=')
5540                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5541             }
5542         }
5543         PL_pending_ident = '$';
5544         TOKEN('$');
5545
5546     case '@':
5547         if (PL_expect == XOPERATOR)
5548             no_op("Array", s);
5549         PL_tokenbuf[0] = '@';
5550         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5551         if (!PL_tokenbuf[1]) {
5552             PREREF('@');
5553         }
5554         if (PL_lex_state == LEX_NORMAL)
5555             s = SKIPSPACE1(s);
5556         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5557             if (*s == '{')
5558                 PL_tokenbuf[0] = '%';
5559
5560             /* Warn about @ where they meant $. */
5561             if (*s == '[' || *s == '{') {
5562                 if (ckWARN(WARN_SYNTAX)) {
5563                     const char *t = s + 1;
5564                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5565                         t++;
5566                     if (*t == '}' || *t == ']') {
5567                         t++;
5568                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5569                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5570                             "Scalar value %.*s better written as $%.*s",
5571                             (int)(t-PL_bufptr), PL_bufptr,
5572                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5573                     }
5574                 }
5575             }
5576         }
5577         PL_pending_ident = '@';
5578         TERM('@');
5579
5580      case '/':                  /* may be division, defined-or, or pattern */
5581         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5582             s += 2;
5583             AOPERATOR(DORDOR);
5584         }
5585      case '?':                  /* may either be conditional or pattern */
5586         if (PL_expect == XOPERATOR) {
5587              char tmp = *s++;
5588              if(tmp == '?') {
5589                 OPERATOR('?');
5590              }
5591              else {
5592                  tmp = *s++;
5593                  if(tmp == '/') {
5594                      /* A // operator. */
5595                     AOPERATOR(DORDOR);
5596                  }
5597                  else {
5598                      s--;
5599                      Mop(OP_DIVIDE);
5600                  }
5601              }
5602          }
5603          else {
5604              /* Disable warning on "study /blah/" */
5605              if (PL_oldoldbufptr == PL_last_uni
5606               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5607                   || memNE(PL_last_uni, "study", 5)
5608                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5609               ))
5610                  check_uni();
5611              s = scan_pat(s,OP_MATCH);
5612              TERM(sublex_start());
5613          }
5614
5615     case '.':
5616         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5617 #ifdef PERL_STRICT_CR
5618             && s[1] == '\n'
5619 #else
5620             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5621 #endif
5622             && (s == PL_linestart || s[-1] == '\n') )
5623         {
5624             PL_lex_formbrack = 0;
5625             PL_expect = XSTATE;
5626             goto rightbracket;
5627         }
5628         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5629             s += 3;
5630             OPERATOR(YADAYADA);
5631         }
5632         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5633             char tmp = *s++;
5634             if (*s == tmp) {
5635                 s++;
5636                 if (*s == tmp) {
5637                     s++;
5638                     pl_yylval.ival = OPf_SPECIAL;
5639                 }
5640                 else
5641                     pl_yylval.ival = 0;
5642                 OPERATOR(DOTDOT);
5643             }
5644             if (PL_expect != XOPERATOR)
5645                 check_uni();
5646             Aop(OP_CONCAT);
5647         }
5648         /* FALL THROUGH */
5649     case '0': case '1': case '2': case '3': case '4':
5650     case '5': case '6': case '7': case '8': case '9':
5651         s = scan_num(s, &pl_yylval);
5652         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5653         if (PL_expect == XOPERATOR)
5654             no_op("Number",s);
5655         TERM(THING);
5656
5657     case '\'':
5658         s = scan_str(s,!!PL_madskills,FALSE);
5659         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5660         if (PL_expect == XOPERATOR) {
5661             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5662                 return deprecate_commaless_var_list();
5663             }
5664             else
5665                 no_op("String",s);
5666         }
5667         if (!s)
5668             missingterm(NULL);
5669         pl_yylval.ival = OP_CONST;
5670         TERM(sublex_start());
5671
5672     case '"':
5673         s = scan_str(s,!!PL_madskills,FALSE);
5674         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5675         if (PL_expect == XOPERATOR) {
5676             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5677                 return deprecate_commaless_var_list();
5678             }
5679             else
5680                 no_op("String",s);
5681         }
5682         if (!s)
5683             missingterm(NULL);
5684         pl_yylval.ival = OP_CONST;
5685         /* FIXME. I think that this can be const if char *d is replaced by
5686            more localised variables.  */
5687         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5688             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5689                 pl_yylval.ival = OP_STRINGIFY;
5690                 break;
5691             }
5692         }
5693         TERM(sublex_start());
5694
5695     case '`':
5696         s = scan_str(s,!!PL_madskills,FALSE);
5697         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5698         if (PL_expect == XOPERATOR)
5699             no_op("Backticks",s);
5700         if (!s)
5701             missingterm(NULL);
5702         readpipe_override();
5703         TERM(sublex_start());
5704
5705     case '\\':
5706         s++;
5707         if (PL_lex_inwhat && isDIGIT(*s))
5708             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5709                            *s, *s);
5710         if (PL_expect == XOPERATOR)
5711             no_op("Backslash",s);
5712         OPERATOR(REFGEN);
5713
5714     case 'v':
5715         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5716             char *start = s + 2;
5717             while (isDIGIT(*start) || *start == '_')
5718                 start++;
5719             if (*start == '.' && isDIGIT(start[1])) {
5720                 s = scan_num(s, &pl_yylval);
5721                 TERM(THING);
5722             }
5723             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5724             else if (!isALPHA(*start) && (PL_expect == XTERM
5725                         || PL_expect == XREF || PL_expect == XSTATE
5726                         || PL_expect == XTERMORDORDOR)) {
5727                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5728                 if (!gv) {
5729                     s = scan_num(s, &pl_yylval);
5730                     TERM(THING);
5731                 }
5732             }
5733         }
5734         goto keylookup;
5735     case 'x':
5736         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5737             s++;
5738             Mop(OP_REPEAT);
5739         }
5740         goto keylookup;
5741
5742     case '_':
5743     case 'a': case 'A':
5744     case 'b': case 'B':
5745     case 'c': case 'C':
5746     case 'd': case 'D':
5747     case 'e': case 'E':
5748     case 'f': case 'F':
5749     case 'g': case 'G':
5750     case 'h': case 'H':
5751     case 'i': case 'I':
5752     case 'j': case 'J':
5753     case 'k': case 'K':
5754     case 'l': case 'L':
5755     case 'm': case 'M':
5756     case 'n': case 'N':
5757     case 'o': case 'O':
5758     case 'p': case 'P':
5759     case 'q': case 'Q':
5760     case 'r': case 'R':
5761     case 's': case 'S':
5762     case 't': case 'T':
5763     case 'u': case 'U':
5764               case 'V':
5765     case 'w': case 'W':
5766               case 'X':
5767     case 'y': case 'Y':
5768     case 'z': case 'Z':
5769
5770       keylookup: {
5771         bool anydelim;
5772         I32 tmp;
5773
5774         orig_keyword = 0;
5775         gv = NULL;
5776         gvp = NULL;
5777
5778         PL_bufptr = s;
5779         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5780
5781         /* Some keywords can be followed by any delimiter, including ':' */
5782         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5783                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5784                              (PL_tokenbuf[0] == 'q' &&
5785                               strchr("qwxr", PL_tokenbuf[1])))));
5786
5787         /* x::* is just a word, unless x is "CORE" */
5788         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5789             goto just_a_word;
5790
5791         d = s;
5792         while (d < PL_bufend && isSPACE(*d))
5793                 d++;    /* no comments skipped here, or s### is misparsed */
5794
5795         /* Is this a word before a => operator? */
5796         if (*d == '=' && d[1] == '>') {
5797             CLINE;
5798             pl_yylval.opval
5799                 = (OP*)newSVOP(OP_CONST, 0,
5800                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5801             pl_yylval.opval->op_private = OPpCONST_BARE;
5802             TERM(WORD);
5803         }
5804
5805         /* Check for plugged-in keyword */
5806         {
5807             OP *o;
5808             int result;
5809             char *saved_bufptr = PL_bufptr;
5810             PL_bufptr = s;
5811             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5812             s = PL_bufptr;
5813             if (result == KEYWORD_PLUGIN_DECLINE) {
5814                 /* not a plugged-in keyword */
5815                 PL_bufptr = saved_bufptr;
5816             } else if (result == KEYWORD_PLUGIN_STMT) {
5817                 pl_yylval.opval = o;
5818                 CLINE;
5819                 PL_expect = XSTATE;
5820                 return REPORT(PLUGSTMT);
5821             } else if (result == KEYWORD_PLUGIN_EXPR) {
5822                 pl_yylval.opval = o;
5823                 CLINE;
5824                 PL_expect = XOPERATOR;
5825                 return REPORT(PLUGEXPR);
5826             } else {
5827                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5828                                         PL_tokenbuf);
5829             }
5830         }
5831
5832         /* Check for built-in keyword */
5833         tmp = keyword(PL_tokenbuf, len, 0);
5834
5835         /* Is this a label? */
5836         if (!anydelim && PL_expect == XSTATE
5837               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5838             if (tmp)
5839                 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5840             s = d + 1;
5841             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5842             CLINE;
5843             TOKEN(LABEL);
5844         }
5845
5846         if (tmp < 0) {                  /* second-class keyword? */
5847             GV *ogv = NULL;     /* override (winner) */
5848             GV *hgv = NULL;     /* hidden (loser) */
5849             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5850                 CV *cv;
5851                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5852                     (cv = GvCVu(gv)))
5853                 {
5854                     if (GvIMPORTED_CV(gv))
5855                         ogv = gv;
5856                     else if (! CvMETHOD(cv))
5857                         hgv = gv;
5858                 }
5859                 if (!ogv &&
5860                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5861                     (gv = *gvp) && isGV_with_GP(gv) &&
5862                     GvCVu(gv) && GvIMPORTED_CV(gv))
5863                 {
5864                     ogv = gv;
5865                 }
5866             }
5867             if (ogv) {
5868                 orig_keyword = tmp;
5869                 tmp = 0;                /* overridden by import or by GLOBAL */
5870             }
5871             else if (gv && !gvp
5872                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5873                      && GvCVu(gv))
5874             {
5875                 tmp = 0;                /* any sub overrides "weak" keyword */
5876             }
5877             else {                      /* no override */
5878                 tmp = -tmp;
5879                 if (tmp == KEY_dump) {
5880                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5881                                    "dump() better written as CORE::dump()");
5882                 }
5883                 gv = NULL;
5884                 gvp = 0;
5885                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
5886                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5887                                    "Ambiguous call resolved as CORE::%s(), %s",
5888                                    GvENAME(hgv), "qualify as such or use &");
5889             }
5890         }
5891
5892       reserved_word:
5893         switch (tmp) {
5894
5895         default:                        /* not a keyword */
5896             /* Trade off - by using this evil construction we can pull the
5897                variable gv into the block labelled keylookup. If not, then
5898                we have to give it function scope so that the goto from the
5899                earlier ':' case doesn't bypass the initialisation.  */
5900             if (0) {
5901             just_a_word_zero_gv:
5902                 gv = NULL;
5903                 gvp = NULL;
5904                 orig_keyword = 0;
5905             }
5906           just_a_word: {
5907                 SV *sv;
5908                 int pkgname = 0;
5909                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5910                 OP *rv2cv_op;
5911                 CV *cv;
5912 #ifdef PERL_MAD
5913                 SV *nextPL_nextwhite = 0;
5914 #endif
5915
5916
5917                 /* Get the rest if it looks like a package qualifier */
5918
5919                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5920                     STRLEN morelen;
5921                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5922                                   TRUE, &morelen);
5923                     if (!morelen)
5924                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5925                                 *s == '\'' ? "'" : "::");
5926                     len += morelen;
5927                     pkgname = 1;
5928                 }
5929
5930                 if (PL_expect == XOPERATOR) {
5931                     if (PL_bufptr == PL_linestart) {
5932                         CopLINE_dec(PL_curcop);
5933                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5934                         CopLINE_inc(PL_curcop);
5935                     }
5936                     else
5937                         no_op("Bareword",s);
5938                 }
5939
5940                 /* Look for a subroutine with this name in current package,
5941                    unless name is "Foo::", in which case Foo is a bearword
5942                    (and a package name). */
5943
5944                 if (len > 2 && !PL_madskills &&
5945                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5946                 {
5947                     if (ckWARN(WARN_BAREWORD)
5948                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5949                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5950                             "Bareword \"%s\" refers to nonexistent package",
5951                              PL_tokenbuf);
5952                     len -= 2;
5953                     PL_tokenbuf[len] = '\0';
5954                     gv = NULL;
5955                     gvp = 0;
5956                 }
5957                 else {
5958                     if (!gv) {
5959                         /* Mustn't actually add anything to a symbol table.
5960                            But also don't want to "initialise" any placeholder
5961                            constants that might already be there into full
5962                            blown PVGVs with attached PVCV.  */
5963                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5964                                                GV_NOADD_NOINIT, SVt_PVCV);
5965                     }
5966                     len = 0;
5967                 }
5968
5969                 /* if we saw a global override before, get the right name */
5970
5971                 if (gvp) {
5972                     sv = newSVpvs("CORE::GLOBAL::");
5973                     sv_catpv(sv,PL_tokenbuf);
5974                 }
5975                 else {
5976                     /* If len is 0, newSVpv does strlen(), which is correct.
5977                        If len is non-zero, then it will be the true length,
5978                        and so the scalar will be created correctly.  */
5979                     sv = newSVpv(PL_tokenbuf,len);
5980                 }
5981 #ifdef PERL_MAD
5982                 if (PL_madskills && !PL_thistoken) {
5983                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5984                     PL_thistoken = newSVpvn(start,s - start);
5985                     PL_realtokenstart = s - SvPVX(PL_linestr);
5986                 }
5987 #endif
5988
5989                 /* Presume this is going to be a bareword of some sort. */
5990
5991                 CLINE;
5992                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5993                 pl_yylval.opval->op_private = OPpCONST_BARE;
5994                 /* UTF-8 package name? */
5995                 if (UTF && !IN_BYTES &&
5996                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5997                     SvUTF8_on(sv);
5998
5999                 /* And if "Foo::", then that's what it certainly is. */
6000
6001                 if (len)
6002                     goto safe_bareword;
6003
6004                 cv = NULL;
6005                 {
6006                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6007                     const_op->op_private = OPpCONST_BARE;
6008                     rv2cv_op = newCVREF(0, const_op);
6009                 }
6010                 if (rv2cv_op->op_type == OP_RV2CV &&
6011                         (rv2cv_op->op_flags & OPf_KIDS)) {
6012                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6013                     switch (rv_op->op_type) {
6014                         case OP_CONST: {
6015                             SV *sv = cSVOPx_sv(rv_op);
6016                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6017                                 cv = (CV*)SvRV(sv);
6018                         } break;
6019                         case OP_GV: {
6020                             GV *gv = cGVOPx_gv(rv_op);
6021                             CV *maybe_cv = GvCVu(gv);
6022                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6023                                 cv = maybe_cv;
6024                         } break;
6025                     }
6026                 }
6027
6028                 /* See if it's the indirect object for a list operator. */
6029
6030                 if (PL_oldoldbufptr &&
6031                     PL_oldoldbufptr < PL_bufptr &&
6032                     (PL_oldoldbufptr == PL_last_lop
6033                      || PL_oldoldbufptr == PL_last_uni) &&
6034                     /* NO SKIPSPACE BEFORE HERE! */
6035                     (PL_expect == XREF ||
6036                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6037                 {
6038                     bool immediate_paren = *s == '(';
6039
6040                     /* (Now we can afford to cross potential line boundary.) */
6041                     s = SKIPSPACE2(s,nextPL_nextwhite);
6042 #ifdef PERL_MAD
6043                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6044 #endif
6045
6046                     /* Two barewords in a row may indicate method call. */
6047
6048                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6049                         (tmp = intuit_method(s, gv, cv))) {
6050                         op_free(rv2cv_op);
6051                         return REPORT(tmp);
6052                     }
6053
6054                     /* If not a declared subroutine, it's an indirect object. */
6055                     /* (But it's an indir obj regardless for sort.) */
6056                     /* Also, if "_" follows a filetest operator, it's a bareword */
6057
6058                     if (
6059                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6060                          (!cv &&
6061                         (PL_last_lop_op != OP_MAPSTART &&
6062                          PL_last_lop_op != OP_GREPSTART))))
6063                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6064                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6065                        )
6066                     {
6067                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6068                         goto bareword;
6069                     }
6070                 }
6071
6072                 PL_expect = XOPERATOR;
6073 #ifdef PERL_MAD
6074                 if (isSPACE(*s))
6075                     s = SKIPSPACE2(s,nextPL_nextwhite);
6076                 PL_nextwhite = nextPL_nextwhite;
6077 #else
6078                 s = skipspace(s);
6079 #endif
6080
6081                 /* Is this a word before a => operator? */
6082                 if (*s == '=' && s[1] == '>' && !pkgname) {
6083                     op_free(rv2cv_op);
6084                     CLINE;
6085                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6086                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6087                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6088                     TERM(WORD);
6089                 }
6090
6091                 /* If followed by a paren, it's certainly a subroutine. */
6092                 if (*s == '(') {
6093                     CLINE;
6094                     if (cv) {
6095                         d = s + 1;
6096                         while (SPACE_OR_TAB(*d))
6097                             d++;
6098                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6099                             s = d + 1;
6100                             goto its_constant;
6101                         }
6102                     }
6103 #ifdef PERL_MAD
6104                     if (PL_madskills) {
6105                         PL_nextwhite = PL_thiswhite;
6106                         PL_thiswhite = 0;
6107                     }
6108                     start_force(PL_curforce);
6109 #endif
6110                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6111                     PL_expect = XOPERATOR;
6112 #ifdef PERL_MAD
6113                     if (PL_madskills) {
6114                         PL_nextwhite = nextPL_nextwhite;
6115                         curmad('X', PL_thistoken);
6116                         PL_thistoken = newSVpvs("");
6117                     }
6118 #endif
6119                     op_free(rv2cv_op);
6120                     force_next(WORD);
6121                     pl_yylval.ival = 0;
6122                     TOKEN('&');
6123                 }
6124
6125                 /* If followed by var or block, call it a method (unless sub) */
6126
6127                 if ((*s == '$' || *s == '{') && !cv) {
6128                     op_free(rv2cv_op);
6129                     PL_last_lop = PL_oldbufptr;
6130                     PL_last_lop_op = OP_METHOD;
6131                     PREBLOCK(METHOD);
6132                 }
6133
6134                 /* If followed by a bareword, see if it looks like indir obj. */
6135
6136                 if (!orig_keyword
6137                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6138                         && (tmp = intuit_method(s, gv, cv))) {
6139                     op_free(rv2cv_op);
6140                     return REPORT(tmp);
6141                 }
6142
6143                 /* Not a method, so call it a subroutine (if defined) */
6144
6145                 if (cv) {
6146                     if (lastchar == '-')
6147                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6148                                          "Ambiguous use of -%s resolved as -&%s()",
6149                                          PL_tokenbuf, PL_tokenbuf);
6150                     /* Check for a constant sub */
6151                     if ((sv = cv_const_sv(cv))) {
6152                   its_constant:
6153                         op_free(rv2cv_op);
6154                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6155                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6156                         pl_yylval.opval->op_private = 0;
6157                         TOKEN(WORD);
6158                     }
6159
6160                     op_free(pl_yylval.opval);
6161                     pl_yylval.opval = rv2cv_op;
6162                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6163                     PL_last_lop = PL_oldbufptr;
6164                     PL_last_lop_op = OP_ENTERSUB;
6165                     /* Is there a prototype? */
6166                     if (
6167 #ifdef PERL_MAD
6168                         cv &&
6169 #endif
6170                         SvPOK(cv))
6171                     {
6172                         STRLEN protolen;
6173                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6174                         if (!protolen)
6175                             TERM(FUNC0SUB);
6176                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6177                             OPERATOR(UNIOPSUB);
6178                         while (*proto == ';')
6179                             proto++;
6180                         if (*proto == '&' && *s == '{') {
6181                             if (PL_curstash)
6182                                 sv_setpvs(PL_subname, "__ANON__");
6183                             else
6184                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6185                             PREBLOCK(LSTOPSUB);
6186                         }
6187                     }
6188 #ifdef PERL_MAD
6189                     {
6190                         if (PL_madskills) {
6191                             PL_nextwhite = PL_thiswhite;
6192                             PL_thiswhite = 0;
6193                         }
6194                         start_force(PL_curforce);
6195                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6196                         PL_expect = XTERM;
6197                         if (PL_madskills) {
6198                             PL_nextwhite = nextPL_nextwhite;
6199                             curmad('X', PL_thistoken);
6200                             PL_thistoken = newSVpvs("");
6201                         }
6202                         force_next(WORD);
6203                         TOKEN(NOAMP);
6204                     }
6205                 }
6206
6207                 /* Guess harder when madskills require "best effort". */
6208                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6209                     int probable_sub = 0;
6210                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6211                         probable_sub = 1;
6212                     else if (isALPHA(*s)) {
6213                         char tmpbuf[1024];
6214                         STRLEN tmplen;
6215                         d = s;
6216                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6217                         if (!keyword(tmpbuf, tmplen, 0))
6218                             probable_sub = 1;
6219                         else {
6220                             while (d < PL_bufend && isSPACE(*d))
6221                                 d++;
6222                             if (*d == '=' && d[1] == '>')
6223                                 probable_sub = 1;
6224                         }
6225                     }
6226                     if (probable_sub) {
6227                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6228                         op_free(pl_yylval.opval);
6229                         pl_yylval.opval = rv2cv_op;
6230                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6231                         PL_last_lop = PL_oldbufptr;
6232                         PL_last_lop_op = OP_ENTERSUB;
6233                         PL_nextwhite = PL_thiswhite;
6234                         PL_thiswhite = 0;
6235                         start_force(PL_curforce);
6236                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6237                         PL_expect = XTERM;
6238                         PL_nextwhite = nextPL_nextwhite;
6239                         curmad('X', PL_thistoken);
6240                         PL_thistoken = newSVpvs("");
6241                         force_next(WORD);
6242                         TOKEN(NOAMP);
6243                     }
6244 #else
6245                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6246                     PL_expect = XTERM;
6247                     force_next(WORD);
6248                     TOKEN(NOAMP);
6249 #endif
6250                 }
6251
6252                 /* Call it a bare word */
6253
6254                 if (PL_hints & HINT_STRICT_SUBS)
6255                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6256                 else {
6257                 bareword:
6258                     /* after "print" and similar functions (corresponding to
6259                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6260                      * a filehandle should be subject to "strict subs".
6261                      * Likewise for the optional indirect-object argument to system
6262                      * or exec, which can't be a bareword */
6263                     if ((PL_last_lop_op == OP_PRINT
6264                             || PL_last_lop_op == OP_PRTF
6265                             || PL_last_lop_op == OP_SAY
6266                             || PL_last_lop_op == OP_SYSTEM
6267                             || PL_last_lop_op == OP_EXEC)
6268                             && (PL_hints & HINT_STRICT_SUBS))
6269                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6270                     if (lastchar != '-') {
6271                         if (ckWARN(WARN_RESERVED)) {
6272                             d = PL_tokenbuf;
6273                             while (isLOWER(*d))
6274                                 d++;
6275                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6276                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6277                                        PL_tokenbuf);
6278                         }
6279                     }
6280                 }
6281                 op_free(rv2cv_op);
6282
6283             safe_bareword:
6284                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6285                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6286                                      "Operator or semicolon missing before %c%s",
6287                                      lastchar, PL_tokenbuf);
6288                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6289                                      "Ambiguous use of %c resolved as operator %c",
6290                                      lastchar, lastchar);
6291                 }
6292                 TOKEN(WORD);
6293             }
6294
6295         case KEY___FILE__:
6296             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6297                                         newSVpv(CopFILE(PL_curcop),0));
6298             TERM(THING);
6299
6300         case KEY___LINE__:
6301             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6302                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6303             TERM(THING);
6304
6305         case KEY___PACKAGE__:
6306             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6307                                         (PL_curstash
6308                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6309                                          : &PL_sv_undef));
6310             TERM(THING);
6311
6312         case KEY___DATA__:
6313         case KEY___END__: {
6314             GV *gv;
6315             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6316                 const char *pname = "main";
6317                 if (PL_tokenbuf[2] == 'D')
6318                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6319                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6320                                 SVt_PVIO);
6321                 GvMULTI_on(gv);
6322                 if (!GvIO(gv))
6323                     GvIOp(gv) = newIO();
6324                 IoIFP(GvIOp(gv)) = PL_rsfp;
6325 #if defined(HAS_FCNTL) && defined(F_SETFD)
6326                 {
6327                     const int fd = PerlIO_fileno(PL_rsfp);
6328                     fcntl(fd,F_SETFD,fd >= 3);
6329                 }
6330 #endif
6331                 /* Mark this internal pseudo-handle as clean */
6332                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6333                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6334                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6335                 else
6336                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6337 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6338                 /* if the script was opened in binmode, we need to revert
6339                  * it to text mode for compatibility; but only iff it has CRs
6340                  * XXX this is a questionable hack at best. */
6341                 if (PL_bufend-PL_bufptr > 2
6342                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6343                 {
6344                     Off_t loc = 0;
6345                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6346                         loc = PerlIO_tell(PL_rsfp);
6347                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6348                     }
6349 #ifdef NETWARE
6350                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6351 #else
6352                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6353 #endif  /* NETWARE */
6354 #ifdef PERLIO_IS_STDIO /* really? */
6355 #  if defined(__BORLANDC__)
6356                         /* XXX see note in do_binmode() */
6357                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6358 #  endif
6359 #endif
6360                         if (loc > 0)
6361                             PerlIO_seek(PL_rsfp, loc, 0);
6362                     }
6363                 }
6364 #endif
6365 #ifdef PERLIO_LAYERS
6366                 if (!IN_BYTES) {
6367                     if (UTF)
6368                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6369                     else if (PL_encoding) {
6370                         SV *name;
6371                         dSP;
6372                         ENTER;
6373                         SAVETMPS;
6374                         PUSHMARK(sp);
6375                         EXTEND(SP, 1);
6376                         XPUSHs(PL_encoding);
6377                         PUTBACK;
6378                         call_method("name", G_SCALAR);
6379                         SPAGAIN;
6380                         name = POPs;
6381                         PUTBACK;
6382                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6383                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6384                                                       SVfARG(name)));
6385                         FREETMPS;
6386                         LEAVE;
6387                     }
6388                 }
6389 #endif
6390 #ifdef PERL_MAD
6391                 if (PL_madskills) {
6392                     if (PL_realtokenstart >= 0) {
6393                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6394                         if (!PL_endwhite)
6395                             PL_endwhite = newSVpvs("");
6396                         sv_catsv(PL_endwhite, PL_thiswhite);
6397                         PL_thiswhite = 0;
6398                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6399                         PL_realtokenstart = -1;
6400                     }
6401                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6402                            != NULL) ;
6403                 }
6404 #endif
6405                 PL_rsfp = NULL;
6406             }
6407             goto fake_eof;
6408         }
6409
6410         case KEY_AUTOLOAD:
6411         case KEY_DESTROY:
6412         case KEY_BEGIN:
6413         case KEY_UNITCHECK:
6414         case KEY_CHECK:
6415         case KEY_INIT:
6416         case KEY_END:
6417             if (PL_expect == XSTATE) {
6418                 s = PL_bufptr;
6419                 goto really_sub;
6420             }
6421             goto just_a_word;
6422
6423         case KEY_CORE:
6424             if (*s == ':' && s[1] == ':') {
6425                 s += 2;
6426                 d = s;
6427                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6428                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6429                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6430                 if (tmp < 0)
6431                     tmp = -tmp;
6432                 else if (tmp == KEY_require || tmp == KEY_do)
6433                     /* that's a way to remember we saw "CORE::" */
6434                     orig_keyword = tmp;
6435                 goto reserved_word;
6436             }
6437             goto just_a_word;
6438
6439         case KEY_abs:
6440             UNI(OP_ABS);
6441
6442         case KEY_alarm:
6443             UNI(OP_ALARM);
6444
6445         case KEY_accept:
6446             LOP(OP_ACCEPT,XTERM);
6447
6448         case KEY_and:
6449             OPERATOR(ANDOP);
6450
6451         case KEY_atan2:
6452             LOP(OP_ATAN2,XTERM);
6453
6454         case KEY_bind:
6455             LOP(OP_BIND,XTERM);
6456
6457         case KEY_binmode:
6458             LOP(OP_BINMODE,XTERM);
6459
6460         case KEY_bless:
6461             LOP(OP_BLESS,XTERM);
6462
6463         case KEY_break:
6464             FUN0(OP_BREAK);
6465
6466         case KEY_chop:
6467             UNI(OP_CHOP);
6468
6469         case KEY_continue:
6470             /* When 'use switch' is in effect, continue has a dual
6471                life as a control operator. */
6472             {
6473                 if (!FEATURE_IS_ENABLED("switch"))
6474                     PREBLOCK(CONTINUE);
6475                 else {
6476                     /* We have to disambiguate the two senses of
6477                       "continue". If the next token is a '{' then
6478                       treat it as the start of a continue block;
6479                       otherwise treat it as a control operator.
6480                      */
6481                     s = skipspace(s);
6482                     if (*s == '{')
6483             PREBLOCK(CONTINUE);
6484                     else
6485                         FUN0(OP_CONTINUE);
6486                 }
6487             }
6488
6489         case KEY_chdir:
6490             /* may use HOME */
6491             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6492             UNI(OP_CHDIR);
6493
6494         case KEY_close:
6495             UNI(OP_CLOSE);
6496
6497         case KEY_closedir:
6498             UNI(OP_CLOSEDIR);
6499
6500         case KEY_cmp:
6501             Eop(OP_SCMP);
6502
6503         case KEY_caller:
6504             UNI(OP_CALLER);
6505
6506         case KEY_crypt:
6507 #ifdef FCRYPT
6508             if (!PL_cryptseen) {
6509                 PL_cryptseen = TRUE;
6510                 init_des();
6511             }
6512 #endif
6513             LOP(OP_CRYPT,XTERM);
6514
6515         case KEY_chmod:
6516             LOP(OP_CHMOD,XTERM);
6517
6518         case KEY_chown:
6519             LOP(OP_CHOWN,XTERM);
6520
6521         case KEY_connect:
6522             LOP(OP_CONNECT,XTERM);
6523
6524         case KEY_chr:
6525             UNI(OP_CHR);
6526
6527         case KEY_cos:
6528             UNI(OP_COS);
6529
6530         case KEY_chroot:
6531             UNI(OP_CHROOT);
6532
6533         case KEY_default:
6534             PREBLOCK(DEFAULT);
6535
6536         case KEY_do:
6537             s = SKIPSPACE1(s);
6538             if (*s == '{')
6539                 PRETERMBLOCK(DO);
6540             if (*s != '\'')
6541                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6542             if (orig_keyword == KEY_do) {
6543                 orig_keyword = 0;
6544                 pl_yylval.ival = 1;
6545             }
6546             else
6547                 pl_yylval.ival = 0;
6548             OPERATOR(DO);
6549
6550         case KEY_die:
6551             PL_hints |= HINT_BLOCK_SCOPE;
6552             LOP(OP_DIE,XTERM);
6553
6554         case KEY_defined:
6555             UNI(OP_DEFINED);
6556
6557         case KEY_delete:
6558             UNI(OP_DELETE);
6559
6560         case KEY_dbmopen:
6561             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6562             LOP(OP_DBMOPEN,XTERM);
6563
6564         case KEY_dbmclose:
6565             UNI(OP_DBMCLOSE);
6566
6567         case KEY_dump:
6568             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6569             LOOPX(OP_DUMP);
6570
6571         case KEY_else:
6572             PREBLOCK(ELSE);
6573
6574         case KEY_elsif:
6575             pl_yylval.ival = CopLINE(PL_curcop);
6576             OPERATOR(ELSIF);
6577
6578         case KEY_eq:
6579             Eop(OP_SEQ);
6580
6581         case KEY_exists:
6582             UNI(OP_EXISTS);
6583
6584         case KEY_exit:
6585             if (PL_madskills)
6586                 UNI(OP_INT);
6587             UNI(OP_EXIT);
6588
6589         case KEY_eval:
6590             s = SKIPSPACE1(s);
6591             PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6592             UNIBRACK(OP_ENTEREVAL);
6593
6594         case KEY_eof:
6595             UNI(OP_EOF);
6596
6597         case KEY_exp:
6598             UNI(OP_EXP);
6599
6600         case KEY_each:
6601             UNI(OP_EACH);
6602
6603         case KEY_exec:
6604             LOP(OP_EXEC,XREF);
6605
6606         case KEY_endhostent:
6607             FUN0(OP_EHOSTENT);
6608
6609         case KEY_endnetent:
6610             FUN0(OP_ENETENT);
6611
6612         case KEY_endservent:
6613             FUN0(OP_ESERVENT);
6614
6615         case KEY_endprotoent:
6616             FUN0(OP_EPROTOENT);
6617
6618         case KEY_endpwent:
6619             FUN0(OP_EPWENT);
6620
6621         case KEY_endgrent:
6622             FUN0(OP_EGRENT);
6623
6624         case KEY_for:
6625         case KEY_foreach:
6626             pl_yylval.ival = CopLINE(PL_curcop);
6627             s = SKIPSPACE1(s);
6628             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6629                 char *p = s;
6630 #ifdef PERL_MAD
6631                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6632 #endif
6633
6634                 if ((PL_bufend - p) >= 3 &&
6635                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6636                     p += 2;
6637                 else if ((PL_bufend - p) >= 4 &&
6638                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6639                     p += 3;
6640                 p = PEEKSPACE(p);
6641                 if (isIDFIRST_lazy_if(p,UTF)) {
6642                     p = scan_ident(p, PL_bufend,
6643                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6644                     p = PEEKSPACE(p);
6645                 }
6646                 if (*p != '$')
6647                     Perl_croak(aTHX_ "Missing $ on loop variable");
6648 #ifdef PERL_MAD
6649                 s = SvPVX(PL_linestr) + soff;
6650 #endif
6651             }
6652             OPERATOR(FOR);
6653
6654         case KEY_formline:
6655             LOP(OP_FORMLINE,XTERM);
6656
6657         case KEY_fork:
6658             FUN0(OP_FORK);
6659
6660         case KEY_fcntl:
6661             LOP(OP_FCNTL,XTERM);
6662
6663         case KEY_fileno:
6664             UNI(OP_FILENO);
6665
6666         case KEY_flock:
6667             LOP(OP_FLOCK,XTERM);
6668
6669         case KEY_gt:
6670             Rop(OP_SGT);
6671
6672         case KEY_ge:
6673             Rop(OP_SGE);
6674
6675         case KEY_grep:
6676             LOP(OP_GREPSTART, XREF);
6677
6678         case KEY_goto:
6679             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6680             LOOPX(OP_GOTO);
6681
6682         case KEY_gmtime:
6683             UNI(OP_GMTIME);
6684
6685         case KEY_getc:
6686             UNIDOR(OP_GETC);
6687
6688         case KEY_getppid:
6689             FUN0(OP_GETPPID);
6690
6691         case KEY_getpgrp:
6692             UNI(OP_GETPGRP);
6693
6694         case KEY_getpriority:
6695             LOP(OP_GETPRIORITY,XTERM);
6696
6697         case KEY_getprotobyname:
6698             UNI(OP_GPBYNAME);
6699
6700         case KEY_getprotobynumber:
6701             LOP(OP_GPBYNUMBER,XTERM);
6702
6703         case KEY_getprotoent:
6704             FUN0(OP_GPROTOENT);
6705
6706         case KEY_getpwent:
6707             FUN0(OP_GPWENT);
6708
6709         case KEY_getpwnam:
6710             UNI(OP_GPWNAM);
6711
6712         case KEY_getpwuid:
6713             UNI(OP_GPWUID);
6714
6715         case KEY_getpeername:
6716             UNI(OP_GETPEERNAME);
6717
6718         case KEY_gethostbyname:
6719             UNI(OP_GHBYNAME);
6720
6721         case KEY_gethostbyaddr:
6722             LOP(OP_GHBYADDR,XTERM);
6723
6724         case KEY_gethostent:
6725             FUN0(OP_GHOSTENT);
6726
6727         case KEY_getnetbyname:
6728             UNI(OP_GNBYNAME);
6729
6730         case KEY_getnetbyaddr:
6731             LOP(OP_GNBYADDR,XTERM);
6732
6733         case KEY_getnetent:
6734             FUN0(OP_GNETENT);
6735
6736         case KEY_getservbyname:
6737             LOP(OP_GSBYNAME,XTERM);
6738
6739         case KEY_getservbyport:
6740             LOP(OP_GSBYPORT,XTERM);
6741
6742         case KEY_getservent:
6743             FUN0(OP_GSERVENT);
6744
6745         case KEY_getsockname:
6746             UNI(OP_GETSOCKNAME);
6747
6748         case KEY_getsockopt:
6749             LOP(OP_GSOCKOPT,XTERM);
6750
6751         case KEY_getgrent:
6752             FUN0(OP_GGRENT);
6753
6754         case KEY_getgrnam:
6755             UNI(OP_GGRNAM);
6756
6757         case KEY_getgrgid:
6758             UNI(OP_GGRGID);
6759
6760         case KEY_getlogin:
6761             FUN0(OP_GETLOGIN);
6762
6763         case KEY_given:
6764             pl_yylval.ival = CopLINE(PL_curcop);
6765             OPERATOR(GIVEN);
6766
6767         case KEY_glob:
6768             LOP(OP_GLOB,XTERM);
6769
6770         case KEY_hex:
6771             UNI(OP_HEX);
6772
6773         case KEY_if:
6774             pl_yylval.ival = CopLINE(PL_curcop);
6775             OPERATOR(IF);
6776
6777         case KEY_index:
6778             LOP(OP_INDEX,XTERM);
6779
6780         case KEY_int:
6781             UNI(OP_INT);
6782
6783         case KEY_ioctl:
6784             LOP(OP_IOCTL,XTERM);
6785
6786         case KEY_join:
6787             LOP(OP_JOIN,XTERM);
6788
6789         case KEY_keys:
6790             UNI(OP_KEYS);
6791
6792         case KEY_kill:
6793             LOP(OP_KILL,XTERM);
6794
6795         case KEY_last:
6796             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6797             LOOPX(OP_LAST);
6798
6799         case KEY_lc:
6800             UNI(OP_LC);
6801
6802         case KEY_lcfirst:
6803             UNI(OP_LCFIRST);
6804
6805         case KEY_local:
6806             pl_yylval.ival = 0;
6807             OPERATOR(LOCAL);
6808
6809         case KEY_length:
6810             UNI(OP_LENGTH);
6811
6812         case KEY_lt:
6813             Rop(OP_SLT);
6814
6815         case KEY_le:
6816             Rop(OP_SLE);
6817
6818         case KEY_localtime:
6819             UNI(OP_LOCALTIME);
6820
6821         case KEY_log:
6822             UNI(OP_LOG);
6823
6824         case KEY_link:
6825             LOP(OP_LINK,XTERM);
6826
6827         case KEY_listen:
6828             LOP(OP_LISTEN,XTERM);
6829
6830         case KEY_lock:
6831             UNI(OP_LOCK);
6832
6833         case KEY_lstat:
6834             UNI(OP_LSTAT);
6835
6836         case KEY_m:
6837             s = scan_pat(s,OP_MATCH);
6838             TERM(sublex_start());
6839
6840         case KEY_map:
6841             LOP(OP_MAPSTART, XREF);
6842
6843         case KEY_mkdir:
6844             LOP(OP_MKDIR,XTERM);
6845
6846         case KEY_msgctl:
6847             LOP(OP_MSGCTL,XTERM);
6848
6849         case KEY_msgget:
6850             LOP(OP_MSGGET,XTERM);
6851
6852         case KEY_msgrcv:
6853             LOP(OP_MSGRCV,XTERM);
6854
6855         case KEY_msgsnd:
6856             LOP(OP_MSGSND,XTERM);
6857
6858         case KEY_our:
6859         case KEY_my:
6860         case KEY_state:
6861             PL_in_my = (U16)tmp;
6862             s = SKIPSPACE1(s);
6863             if (isIDFIRST_lazy_if(s,UTF)) {
6864 #ifdef PERL_MAD
6865                 char* start = s;
6866 #endif
6867                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6868                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6869                     goto really_sub;
6870                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6871                 if (!PL_in_my_stash) {
6872                     char tmpbuf[1024];
6873                     PL_bufptr = s;
6874                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6875                     yyerror(tmpbuf);
6876                 }
6877 #ifdef PERL_MAD
6878                 if (PL_madskills) {     /* just add type to declarator token */
6879                     sv_catsv(PL_thistoken, PL_nextwhite);
6880                     PL_nextwhite = 0;
6881                     sv_catpvn(PL_thistoken, start, s - start);
6882                 }
6883 #endif
6884             }
6885             pl_yylval.ival = 1;
6886             OPERATOR(MY);
6887
6888         case KEY_next:
6889             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6890             LOOPX(OP_NEXT);
6891
6892         case KEY_ne:
6893             Eop(OP_SNE);
6894
6895         case KEY_no:
6896             s = tokenize_use(0, s);
6897             OPERATOR(USE);
6898
6899         case KEY_not:
6900             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6901                 FUN1(OP_NOT);
6902             else
6903                 OPERATOR(NOTOP);
6904
6905         case KEY_open:
6906             s = SKIPSPACE1(s);
6907             if (isIDFIRST_lazy_if(s,UTF)) {
6908                 const char *t;
6909                 for (d = s; isALNUM_lazy_if(d,UTF);)
6910                     d++;
6911                 for (t=d; isSPACE(*t);)
6912                     t++;
6913                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6914                     /* [perl #16184] */
6915                     && !(t[0] == '=' && t[1] == '>')
6916                 ) {
6917                     int parms_len = (int)(d-s);
6918                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6919                            "Precedence problem: open %.*s should be open(%.*s)",
6920                             parms_len, s, parms_len, s);
6921                 }
6922             }
6923             LOP(OP_OPEN,XTERM);
6924
6925         case KEY_or:
6926             pl_yylval.ival = OP_OR;
6927             OPERATOR(OROP);
6928
6929         case KEY_ord:
6930             UNI(OP_ORD);
6931
6932         case KEY_oct:
6933             UNI(OP_OCT);
6934
6935         case KEY_opendir:
6936             LOP(OP_OPEN_DIR,XTERM);
6937
6938         case KEY_print:
6939             checkcomma(s,PL_tokenbuf,"filehandle");
6940             LOP(OP_PRINT,XREF);
6941
6942         case KEY_printf:
6943             checkcomma(s,PL_tokenbuf,"filehandle");
6944             LOP(OP_PRTF,XREF);
6945
6946         case KEY_prototype:
6947             UNI(OP_PROTOTYPE);
6948
6949         case KEY_push:
6950             LOP(OP_PUSH,XTERM);
6951
6952         case KEY_pop:
6953             UNIDOR(OP_POP);
6954
6955         case KEY_pos:
6956             UNIDOR(OP_POS);
6957
6958         case KEY_pack:
6959             LOP(OP_PACK,XTERM);
6960
6961         case KEY_package:
6962             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6963             s = force_version(s, FALSE);
6964             OPERATOR(PACKAGE);
6965
6966         case KEY_pipe:
6967             LOP(OP_PIPE_OP,XTERM);
6968
6969         case KEY_q:
6970             s = scan_str(s,!!PL_madskills,FALSE);
6971             if (!s)
6972                 missingterm(NULL);
6973             pl_yylval.ival = OP_CONST;
6974             TERM(sublex_start());
6975
6976         case KEY_quotemeta:
6977             UNI(OP_QUOTEMETA);
6978
6979         case KEY_qw:
6980             s = scan_str(s,!!PL_madskills,FALSE);
6981             if (!s)
6982                 missingterm(NULL);
6983             PL_expect = XOPERATOR;
6984             force_next(')');
6985             if (SvCUR(PL_lex_stuff)) {
6986                 OP *words = NULL;
6987                 int warned = 0;
6988                 d = SvPV_force(PL_lex_stuff, len);
6989                 while (len) {
6990                     for (; isSPACE(*d) && len; --len, ++d)
6991                         /**/;
6992                     if (len) {
6993                         SV *sv;
6994                         const char *b = d;
6995                         if (!warned && ckWARN(WARN_QW)) {
6996                             for (; !isSPACE(*d) && len; --len, ++d) {
6997                                 if (*d == ',') {
6998                                     Perl_warner(aTHX_ packWARN(WARN_QW),
6999                                         "Possible attempt to separate words with commas");
7000                                     ++warned;
7001                                 }
7002                                 else if (*d == '#') {
7003                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7004                                         "Possible attempt to put comments in qw() list");
7005                                     ++warned;
7006                                 }
7007                             }
7008                         }
7009                         else {
7010                             for (; !isSPACE(*d) && len; --len, ++d)
7011                                 /**/;
7012                         }
7013                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7014                         words = append_elem(OP_LIST, words,
7015                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7016                     }
7017                 }
7018                 if (words) {
7019                     start_force(PL_curforce);
7020                     NEXTVAL_NEXTTOKE.opval = words;
7021                     force_next(THING);
7022                 }
7023             }
7024             if (PL_lex_stuff) {
7025                 SvREFCNT_dec(PL_lex_stuff);
7026                 PL_lex_stuff = NULL;
7027             }
7028             PL_expect = XTERM;
7029             TOKEN('(');
7030
7031         case KEY_qq:
7032             s = scan_str(s,!!PL_madskills,FALSE);
7033             if (!s)
7034                 missingterm(NULL);
7035             pl_yylval.ival = OP_STRINGIFY;
7036             if (SvIVX(PL_lex_stuff) == '\'')
7037                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7038             TERM(sublex_start());
7039
7040         case KEY_qr:
7041             s = scan_pat(s,OP_QR);
7042             TERM(sublex_start());
7043
7044         case KEY_qx:
7045             s = scan_str(s,!!PL_madskills,FALSE);
7046             if (!s)
7047                 missingterm(NULL);
7048             readpipe_override();
7049             TERM(sublex_start());
7050
7051         case KEY_return:
7052             OLDLOP(OP_RETURN);
7053
7054         case KEY_require:
7055             s = SKIPSPACE1(s);
7056             if (isDIGIT(*s)) {
7057                 s = force_version(s, FALSE);
7058             }
7059             else if (*s != 'v' || !isDIGIT(s[1])
7060                     || (s = force_version(s, TRUE), *s == 'v'))
7061             {
7062                 *PL_tokenbuf = '\0';
7063                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7064                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7065                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7066                 else if (*s == '<')
7067                     yyerror("<> should be quotes");
7068             }
7069             if (orig_keyword == KEY_require) {
7070                 orig_keyword = 0;
7071                 pl_yylval.ival = 1;
7072             }
7073             else
7074                 pl_yylval.ival = 0;
7075             PL_expect = XTERM;
7076             PL_bufptr = s;
7077             PL_last_uni = PL_oldbufptr;
7078             PL_last_lop_op = OP_REQUIRE;
7079             s = skipspace(s);
7080             return REPORT( (int)REQUIRE );
7081
7082         case KEY_reset:
7083             UNI(OP_RESET);
7084
7085         case KEY_redo:
7086             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7087             LOOPX(OP_REDO);
7088
7089         case KEY_rename:
7090             LOP(OP_RENAME,XTERM);
7091
7092         case KEY_rand:
7093             UNI(OP_RAND);
7094
7095         case KEY_rmdir:
7096             UNI(OP_RMDIR);
7097
7098         case KEY_rindex:
7099             LOP(OP_RINDEX,XTERM);
7100
7101         case KEY_read:
7102             LOP(OP_READ,XTERM);
7103
7104         case KEY_readdir:
7105             UNI(OP_READDIR);
7106
7107         case KEY_readline:
7108             UNIDOR(OP_READLINE);
7109
7110         case KEY_readpipe:
7111             UNIDOR(OP_BACKTICK);
7112
7113         case KEY_rewinddir:
7114             UNI(OP_REWINDDIR);
7115
7116         case KEY_recv:
7117             LOP(OP_RECV,XTERM);
7118
7119         case KEY_reverse:
7120             LOP(OP_REVERSE,XTERM);
7121
7122         case KEY_readlink:
7123             UNIDOR(OP_READLINK);
7124
7125         case KEY_ref:
7126             UNI(OP_REF);
7127
7128         case KEY_s:
7129             s = scan_subst(s);
7130             if (pl_yylval.opval)
7131                 TERM(sublex_start());
7132             else
7133                 TOKEN(1);       /* force error */
7134
7135         case KEY_say:
7136             checkcomma(s,PL_tokenbuf,"filehandle");
7137             LOP(OP_SAY,XREF);
7138
7139         case KEY_chomp:
7140             UNI(OP_CHOMP);
7141
7142         case KEY_scalar:
7143             UNI(OP_SCALAR);
7144
7145         case KEY_select:
7146             LOP(OP_SELECT,XTERM);
7147
7148         case KEY_seek:
7149             LOP(OP_SEEK,XTERM);
7150
7151         case KEY_semctl:
7152             LOP(OP_SEMCTL,XTERM);
7153
7154         case KEY_semget:
7155             LOP(OP_SEMGET,XTERM);
7156
7157         case KEY_semop:
7158             LOP(OP_SEMOP,XTERM);
7159
7160         case KEY_send:
7161             LOP(OP_SEND,XTERM);
7162
7163         case KEY_setpgrp:
7164             LOP(OP_SETPGRP,XTERM);
7165
7166         case KEY_setpriority:
7167             LOP(OP_SETPRIORITY,XTERM);
7168
7169         case KEY_sethostent:
7170             UNI(OP_SHOSTENT);
7171
7172         case KEY_setnetent:
7173             UNI(OP_SNETENT);
7174
7175         case KEY_setservent:
7176             UNI(OP_SSERVENT);
7177
7178         case KEY_setprotoent:
7179             UNI(OP_SPROTOENT);
7180
7181         case KEY_setpwent:
7182             FUN0(OP_SPWENT);
7183
7184         case KEY_setgrent:
7185             FUN0(OP_SGRENT);
7186
7187         case KEY_seekdir:
7188             LOP(OP_SEEKDIR,XTERM);
7189
7190         case KEY_setsockopt:
7191             LOP(OP_SSOCKOPT,XTERM);
7192
7193         case KEY_shift:
7194             UNIDOR(OP_SHIFT);
7195
7196         case KEY_shmctl:
7197             LOP(OP_SHMCTL,XTERM);
7198
7199         case KEY_shmget:
7200             LOP(OP_SHMGET,XTERM);
7201
7202         case KEY_shmread:
7203             LOP(OP_SHMREAD,XTERM);
7204
7205         case KEY_shmwrite:
7206             LOP(OP_SHMWRITE,XTERM);
7207
7208         case KEY_shutdown:
7209             LOP(OP_SHUTDOWN,XTERM);
7210
7211         case KEY_sin:
7212             UNI(OP_SIN);
7213
7214         case KEY_sleep:
7215             UNI(OP_SLEEP);
7216
7217         case KEY_socket:
7218             LOP(OP_SOCKET,XTERM);
7219
7220         case KEY_socketpair:
7221             LOP(OP_SOCKPAIR,XTERM);
7222
7223         case KEY_sort:
7224             checkcomma(s,PL_tokenbuf,"subroutine name");
7225             s = SKIPSPACE1(s);
7226             if (*s == ';' || *s == ')')         /* probably a close */
7227                 Perl_croak(aTHX_ "sort is now a reserved word");
7228             PL_expect = XTERM;
7229             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7230             LOP(OP_SORT,XREF);
7231
7232         case KEY_split:
7233             LOP(OP_SPLIT,XTERM);
7234
7235         case KEY_sprintf:
7236             LOP(OP_SPRINTF,XTERM);
7237
7238         case KEY_splice:
7239             LOP(OP_SPLICE,XTERM);
7240
7241         case KEY_sqrt:
7242             UNI(OP_SQRT);
7243
7244         case KEY_srand:
7245             UNI(OP_SRAND);
7246
7247         case KEY_stat:
7248             UNI(OP_STAT);
7249
7250         case KEY_study:
7251             UNI(OP_STUDY);
7252
7253         case KEY_substr:
7254             LOP(OP_SUBSTR,XTERM);
7255
7256         case KEY_format:
7257         case KEY_sub:
7258           really_sub:
7259             {
7260                 char tmpbuf[sizeof PL_tokenbuf];
7261                 SSize_t tboffset = 0;
7262                 expectation attrful;
7263                 bool have_name, have_proto;
7264                 const int key = tmp;
7265
7266 #ifdef PERL_MAD
7267                 SV *tmpwhite = 0;
7268
7269                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7270                 SV *subtoken = newSVpvn(tstart, s - tstart);
7271                 PL_thistoken = 0;
7272
7273                 d = s;
7274                 s = SKIPSPACE2(s,tmpwhite);
7275 #else
7276                 s = skipspace(s);
7277 #endif
7278
7279                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7280                     (*s == ':' && s[1] == ':'))
7281                 {
7282 #ifdef PERL_MAD
7283                     SV *nametoke = NULL;
7284 #endif
7285
7286                     PL_expect = XBLOCK;
7287                     attrful = XATTRBLOCK;
7288                     /* remember buffer pos'n for later force_word */
7289                     tboffset = s - PL_oldbufptr;
7290                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7291 #ifdef PERL_MAD
7292                     if (PL_madskills)
7293                         nametoke = newSVpvn(s, d - s);
7294 #endif
7295                     if (memchr(tmpbuf, ':', len))
7296                         sv_setpvn(PL_subname, tmpbuf, len);
7297                     else {
7298                         sv_setsv(PL_subname,PL_curstname);
7299                         sv_catpvs(PL_subname,"::");
7300                         sv_catpvn(PL_subname,tmpbuf,len);
7301                     }
7302                     have_name = TRUE;
7303
7304 #ifdef PERL_MAD
7305
7306                     start_force(0);
7307                     CURMAD('X', nametoke);
7308                     CURMAD('_', tmpwhite);
7309                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7310                                       FALSE, TRUE, TRUE);
7311
7312                     s = SKIPSPACE2(d,tmpwhite);
7313 #else
7314                     s = skipspace(d);
7315 #endif
7316                 }
7317                 else {
7318                     if (key == KEY_my)
7319                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7320                     PL_expect = XTERMBLOCK;
7321                     attrful = XATTRTERM;
7322                     sv_setpvs(PL_subname,"?");
7323                     have_name = FALSE;
7324                 }
7325
7326                 if (key == KEY_format) {
7327                     if (*s == '=')
7328                         PL_lex_formbrack = PL_lex_brackets + 1;
7329 #ifdef PERL_MAD
7330                     PL_thistoken = subtoken;
7331                     s = d;
7332 #else
7333                     if (have_name)
7334                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7335                                           FALSE, TRUE, TRUE);
7336 #endif
7337                     OPERATOR(FORMAT);
7338                 }
7339
7340                 /* Look for a prototype */
7341                 if (*s == '(') {
7342                     char *p;
7343                     bool bad_proto = FALSE;
7344                     bool in_brackets = FALSE;
7345                     char greedy_proto = ' ';
7346                     bool proto_after_greedy_proto = FALSE;
7347                     bool must_be_last = FALSE;
7348                     bool underscore = FALSE;
7349                     bool seen_underscore = FALSE;
7350                     const bool warnsyntax = ckWARN(WARN_SYNTAX);
7351
7352                     s = scan_str(s,!!PL_madskills,FALSE);
7353                     if (!s)
7354                         Perl_croak(aTHX_ "Prototype not terminated");
7355                     /* strip spaces and check for bad characters */
7356                     d = SvPVX(PL_lex_stuff);
7357                     tmp = 0;
7358                     for (p = d; *p; ++p) {
7359                         if (!isSPACE(*p)) {
7360                             d[tmp++] = *p;
7361
7362                             if (warnsyntax) {
7363                                 if (must_be_last)
7364                                     proto_after_greedy_proto = TRUE;
7365                                 if (!strchr("$@%*;[]&\\_", *p)) {
7366                                     bad_proto = TRUE;
7367                                 }
7368                                 else {
7369                                     if ( underscore ) {
7370                                         if ( *p != ';' )
7371                                             bad_proto = TRUE;
7372                                         underscore = FALSE;
7373                                     }
7374                                     if ( *p == '[' ) {
7375                                         in_brackets = TRUE;
7376                                     }
7377                                     else if ( *p == ']' ) {
7378                                         in_brackets = FALSE;
7379                                     }
7380                                     else if ( (*p == '@' || *p == '%') &&
7381                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7382                                          !in_brackets ) {
7383                                         must_be_last = TRUE;
7384                                         greedy_proto = *p;
7385                                     }
7386                                     else if ( *p == '_' ) {
7387                                         underscore = seen_underscore = TRUE;
7388                                     }
7389                                 }
7390                             }
7391                         }
7392                     }
7393                     d[tmp] = '\0';
7394                     if (proto_after_greedy_proto)
7395                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7396                                     "Prototype after '%c' for %"SVf" : %s",
7397                                     greedy_proto, SVfARG(PL_subname), d);
7398                     if (bad_proto)
7399                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7400                                     "Illegal character %sin prototype for %"SVf" : %s",
7401                                     seen_underscore ? "after '_' " : "",
7402                                     SVfARG(PL_subname), d);
7403                     SvCUR_set(PL_lex_stuff, tmp);
7404                     have_proto = TRUE;
7405
7406 #ifdef PERL_MAD
7407                     start_force(0);
7408                     CURMAD('q', PL_thisopen);
7409                     CURMAD('_', tmpwhite);
7410                     CURMAD('=', PL_thisstuff);
7411                     CURMAD('Q', PL_thisclose);
7412                     NEXTVAL_NEXTTOKE.opval =
7413                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7414                     PL_lex_stuff = NULL;
7415                     force_next(THING);
7416
7417                     s = SKIPSPACE2(s,tmpwhite);
7418 #else
7419                     s = skipspace(s);
7420 #endif
7421                 }
7422                 else
7423                     have_proto = FALSE;
7424
7425                 if (*s == ':' && s[1] != ':')
7426                     PL_expect = attrful;
7427                 else if (*s != '{' && key == KEY_sub) {
7428                     if (!have_name)
7429                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7430                     else if (*s != ';')
7431                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7432                 }
7433
7434 #ifdef PERL_MAD
7435                 start_force(0);
7436                 if (tmpwhite) {
7437                     if (PL_madskills)
7438                         curmad('^', newSVpvs(""));
7439                     CURMAD('_', tmpwhite);
7440                 }
7441                 force_next(0);
7442
7443                 PL_thistoken = subtoken;
7444 #else
7445                 if (have_proto) {
7446                     NEXTVAL_NEXTTOKE.opval =
7447                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7448                     PL_lex_stuff = NULL;
7449                     force_next(THING);
7450                 }
7451 #endif
7452                 if (!have_name) {
7453                     if (PL_curstash)
7454                         sv_setpvs(PL_subname, "__ANON__");
7455                     else
7456                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7457                     TOKEN(ANONSUB);
7458                 }
7459 #ifndef PERL_MAD
7460                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7461                                   FALSE, TRUE, TRUE);
7462 #endif
7463                 if (key == KEY_my)
7464                     TOKEN(MYSUB);
7465                 TOKEN(SUB);
7466             }
7467
7468         case KEY_system:
7469             LOP(OP_SYSTEM,XREF);
7470
7471         case KEY_symlink:
7472             LOP(OP_SYMLINK,XTERM);
7473
7474         case KEY_syscall:
7475             LOP(OP_SYSCALL,XTERM);
7476
7477         case KEY_sysopen:
7478             LOP(OP_SYSOPEN,XTERM);
7479
7480         case KEY_sysseek:
7481             LOP(OP_SYSSEEK,XTERM);
7482
7483         case KEY_sysread:
7484             LOP(OP_SYSREAD,XTERM);
7485
7486         case KEY_syswrite:
7487             LOP(OP_SYSWRITE,XTERM);
7488
7489         case KEY_tr:
7490             s = scan_trans(s);
7491             TERM(sublex_start());
7492
7493         case KEY_tell:
7494             UNI(OP_TELL);
7495
7496         case KEY_telldir:
7497             UNI(OP_TELLDIR);
7498
7499         case KEY_tie:
7500             LOP(OP_TIE,XTERM);
7501
7502         case KEY_tied:
7503             UNI(OP_TIED);
7504
7505         case KEY_time:
7506             FUN0(OP_TIME);
7507
7508         case KEY_times:
7509             FUN0(OP_TMS);
7510
7511         case KEY_truncate:
7512             LOP(OP_TRUNCATE,XTERM);
7513
7514         case KEY_uc:
7515             UNI(OP_UC);
7516
7517         case KEY_ucfirst:
7518             UNI(OP_UCFIRST);
7519
7520         case KEY_untie:
7521             UNI(OP_UNTIE);
7522
7523         case KEY_until:
7524             pl_yylval.ival = CopLINE(PL_curcop);
7525             OPERATOR(UNTIL);
7526
7527         case KEY_unless:
7528             pl_yylval.ival = CopLINE(PL_curcop);
7529             OPERATOR(UNLESS);
7530
7531         case KEY_unlink:
7532             LOP(OP_UNLINK,XTERM);
7533
7534         case KEY_undef:
7535             UNIDOR(OP_UNDEF);
7536
7537         case KEY_unpack:
7538             LOP(OP_UNPACK,XTERM);
7539
7540         case KEY_utime:
7541             LOP(OP_UTIME,XTERM);
7542
7543         case KEY_umask:
7544             UNIDOR(OP_UMASK);
7545
7546         case KEY_unshift:
7547             LOP(OP_UNSHIFT,XTERM);
7548
7549         case KEY_use:
7550             s = tokenize_use(1, s);
7551             OPERATOR(USE);
7552
7553         case KEY_values:
7554             UNI(OP_VALUES);
7555
7556         case KEY_vec:
7557             LOP(OP_VEC,XTERM);
7558
7559         case KEY_when:
7560             pl_yylval.ival = CopLINE(PL_curcop);
7561             OPERATOR(WHEN);
7562
7563         case KEY_while:
7564             pl_yylval.ival = CopLINE(PL_curcop);
7565             OPERATOR(WHILE);
7566
7567         case KEY_warn:
7568             PL_hints |= HINT_BLOCK_SCOPE;
7569             LOP(OP_WARN,XTERM);
7570
7571         case KEY_wait:
7572             FUN0(OP_WAIT);
7573
7574         case KEY_waitpid:
7575             LOP(OP_WAITPID,XTERM);
7576
7577         case KEY_wantarray:
7578             FUN0(OP_WANTARRAY);
7579
7580         case KEY_write:
7581 #ifdef EBCDIC
7582         {
7583             char ctl_l[2];
7584             ctl_l[0] = toCTRL('L');
7585             ctl_l[1] = '\0';
7586             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7587         }
7588 #else
7589             /* Make sure $^L is defined */
7590             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7591 #endif
7592             UNI(OP_ENTERWRITE);
7593
7594         case KEY_x:
7595             if (PL_expect == XOPERATOR)
7596                 Mop(OP_REPEAT);
7597             check_uni();
7598             goto just_a_word;
7599
7600         case KEY_xor:
7601             pl_yylval.ival = OP_XOR;
7602             OPERATOR(OROP);
7603
7604         case KEY_y:
7605             s = scan_trans(s);
7606             TERM(sublex_start());
7607         }
7608     }}
7609 }
7610 #ifdef __SC__
7611 #pragma segment Main
7612 #endif
7613
7614 static int
7615 S_pending_ident(pTHX)
7616 {
7617     dVAR;
7618     register char *d;
7619     PADOFFSET tmp = 0;
7620     /* pit holds the identifier we read and pending_ident is reset */
7621     char pit = PL_pending_ident;
7622     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7623     /* All routes through this function want to know if there is a colon.  */
7624     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7625     PL_pending_ident = 0;
7626
7627     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7628     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7629           "### Pending identifier '%s'\n", PL_tokenbuf); });
7630
7631     /* if we're in a my(), we can't allow dynamics here.
7632        $foo'bar has already been turned into $foo::bar, so
7633        just check for colons.
7634
7635        if it's a legal name, the OP is a PADANY.
7636     */
7637     if (PL_in_my) {
7638         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7639             if (has_colon)
7640                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7641                                   "variable %s in \"our\"",
7642                                   PL_tokenbuf));
7643             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7644         }
7645         else {
7646             if (has_colon)
7647                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7648                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7649
7650             pl_yylval.opval = newOP(OP_PADANY, 0);
7651             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7652             return PRIVATEREF;
7653         }
7654     }
7655
7656     /*
7657        build the ops for accesses to a my() variable.
7658
7659        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7660        then used in a comparison.  This catches most, but not
7661        all cases.  For instance, it catches
7662            sort { my($a); $a <=> $b }
7663        but not
7664            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7665        (although why you'd do that is anyone's guess).
7666     */
7667
7668     if (!has_colon) {
7669         if (!PL_in_my)
7670             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7671         if (tmp != NOT_IN_PAD) {
7672             /* might be an "our" variable" */
7673             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7674                 /* build ops for a bareword */
7675                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7676                 HEK * const stashname = HvNAME_HEK(stash);
7677                 SV *  const sym = newSVhek(stashname);
7678                 sv_catpvs(sym, "::");
7679                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7680                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7681                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7682                 gv_fetchsv(sym,
7683                     (PL_in_eval
7684                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7685                         : GV_ADDMULTI
7686                     ),
7687                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7688                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7689                      : SVt_PVHV));
7690                 return WORD;
7691             }
7692
7693             /* if it's a sort block and they're naming $a or $b */
7694             if (PL_last_lop_op == OP_SORT &&
7695                 PL_tokenbuf[0] == '$' &&
7696                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7697                 && !PL_tokenbuf[2])
7698             {
7699                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7700                      d < PL_bufend && *d != '\n';
7701                      d++)
7702                 {
7703                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7704                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7705                               PL_tokenbuf);
7706                     }
7707                 }
7708             }
7709
7710             pl_yylval.opval = newOP(OP_PADANY, 0);
7711             pl_yylval.opval->op_targ = tmp;
7712             return PRIVATEREF;
7713         }
7714     }
7715
7716     /*
7717        Whine if they've said @foo in a doublequoted string,
7718        and @foo isn't a variable we can find in the symbol
7719        table.
7720     */
7721     if (ckWARN(WARN_AMBIGUOUS) &&
7722         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7723         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7724                                          SVt_PVAV);
7725         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7726                 /* DO NOT warn for @- and @+ */
7727                 && !( PL_tokenbuf[2] == '\0' &&
7728                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7729            )
7730         {
7731             /* Downgraded from fatal to warning 20000522 mjd */
7732             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7733                         "Possible unintended interpolation of %s in string",
7734                         PL_tokenbuf);
7735         }
7736     }
7737
7738     /* build ops for a bareword */
7739     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7740                                                       tokenbuf_len - 1));
7741     pl_yylval.opval->op_private = OPpCONST_ENTERED;
7742     gv_fetchpvn_flags(
7743             PL_tokenbuf + 1, tokenbuf_len - 1,
7744             /* If the identifier refers to a stash, don't autovivify it.
7745              * Change 24660 had the side effect of causing symbol table
7746              * hashes to always be defined, even if they were freshly
7747              * created and the only reference in the entire program was
7748              * the single statement with the defined %foo::bar:: test.
7749              * It appears that all code in the wild doing this actually
7750              * wants to know whether sub-packages have been loaded, so
7751              * by avoiding auto-vivifying symbol tables, we ensure that
7752              * defined %foo::bar:: continues to be false, and the existing
7753              * tests still give the expected answers, even though what
7754              * they're actually testing has now changed subtly.
7755              */
7756             (*PL_tokenbuf == '%'
7757              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7758              && d[-1] == ':'
7759              ? 0
7760              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7761             ((PL_tokenbuf[0] == '$') ? SVt_PV
7762              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7763              : SVt_PVHV));
7764     return WORD;
7765 }
7766
7767 /*
7768  *  The following code was generated by perl_keyword.pl.
7769  */
7770
7771 I32
7772 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7773 {
7774     dVAR;
7775
7776     PERL_ARGS_ASSERT_KEYWORD;
7777
7778   switch (len)
7779   {
7780     case 1: /* 5 tokens of length 1 */
7781       switch (name[0])
7782       {
7783         case 'm':
7784           {                                       /* m          */
7785             return KEY_m;
7786           }
7787
7788         case 'q':
7789           {                                       /* q          */
7790             return KEY_q;
7791           }
7792
7793         case 's':
7794           {                                       /* s          */
7795             return KEY_s;
7796           }
7797
7798         case 'x':
7799           {                                       /* x          */
7800             return -KEY_x;
7801           }
7802
7803         case 'y':
7804           {                                       /* y          */
7805             return KEY_y;
7806           }
7807
7808         default:
7809           goto unknown;
7810       }
7811
7812     case 2: /* 18 tokens of length 2 */
7813       switch (name[0])
7814       {
7815         case 'd':
7816           if (name[1] == 'o')
7817           {                                       /* do         */
7818             return KEY_do;
7819           }
7820
7821           goto unknown;
7822
7823         case 'e':
7824           if (name[1] == 'q')
7825           {                                       /* eq         */
7826             return -KEY_eq;
7827           }
7828
7829           goto unknown;
7830
7831         case 'g':
7832           switch (name[1])
7833           {
7834             case 'e':
7835               {                                   /* ge         */
7836                 return -KEY_ge;
7837               }
7838
7839             case 't':
7840               {                                   /* gt         */
7841                 return -KEY_gt;
7842               }
7843
7844             default:
7845               goto unknown;
7846           }
7847
7848         case 'i':
7849           if (name[1] == 'f')
7850           {                                       /* if         */
7851             return KEY_if;
7852           }
7853
7854           goto unknown;
7855
7856         case 'l':
7857           switch (name[1])
7858           {
7859             case 'c':
7860               {                                   /* lc         */
7861                 return -KEY_lc;
7862               }
7863
7864             case 'e':
7865               {                                   /* le         */
7866                 return -KEY_le;
7867               }
7868
7869             case 't':
7870               {                                   /* lt         */
7871                 return -KEY_lt;
7872               }
7873
7874             default:
7875               goto unknown;
7876           }
7877
7878         case 'm':
7879           if (name[1] == 'y')
7880           {                                       /* my         */
7881             return KEY_my;
7882           }
7883
7884           goto unknown;
7885
7886         case 'n':
7887           switch (name[1])
7888           {
7889             case 'e':
7890               {                                   /* ne         */
7891                 return -KEY_ne;
7892               }
7893
7894             case 'o':
7895               {                                   /* no         */
7896                 return KEY_no;
7897               }
7898
7899             default:
7900               goto unknown;
7901           }
7902
7903         case 'o':
7904           if (name[1] == 'r')
7905           {                                       /* or         */
7906             return -KEY_or;
7907           }
7908
7909           goto unknown;
7910
7911         case 'q':
7912           switch (name[1])
7913           {
7914             case 'q':
7915               {                                   /* qq         */
7916                 return KEY_qq;
7917               }
7918
7919             case 'r':
7920               {                                   /* qr         */
7921                 return KEY_qr;
7922               }
7923
7924             case 'w':
7925               {                                   /* qw         */
7926                 return KEY_qw;
7927               }
7928
7929             case 'x':
7930               {                                   /* qx         */
7931                 return KEY_qx;
7932               }
7933
7934             default:
7935               goto unknown;
7936           }
7937
7938         case 't':
7939           if (name[1] == 'r')
7940           {                                       /* tr         */
7941             return KEY_tr;
7942           }
7943
7944           goto unknown;
7945
7946         case 'u':
7947           if (name[1] == 'c')
7948           {                                       /* uc         */
7949             return -KEY_uc;
7950           }
7951
7952           goto unknown;
7953
7954         default:
7955           goto unknown;
7956       }
7957
7958     case 3: /* 29 tokens of length 3 */
7959       switch (name[0])
7960       {
7961         case 'E':
7962           if (name[1] == 'N' &&
7963               name[2] == 'D')
7964           {                                       /* END        */
7965             return KEY_END;
7966           }
7967
7968           goto unknown;
7969
7970         case 'a':
7971           switch (name[1])
7972           {
7973             case 'b':
7974               if (name[2] == 's')
7975               {                                   /* abs        */
7976                 return -KEY_abs;
7977               }
7978
7979               goto unknown;
7980
7981             case 'n':
7982               if (name[2] == 'd')
7983               {                                   /* and        */
7984                 return -KEY_and;
7985               }
7986
7987               goto unknown;
7988
7989             default:
7990               goto unknown;
7991           }
7992
7993         case 'c':
7994           switch (name[1])
7995           {
7996             case 'h':
7997               if (name[2] == 'r')
7998               {                                   /* chr        */
7999                 return -KEY_chr;
8000               }
8001
8002               goto unknown;
8003
8004             case 'm':
8005               if (name[2] == 'p')
8006               {                                   /* cmp        */
8007                 return -KEY_cmp;
8008               }
8009
8010               goto unknown;
8011
8012             case 'o':
8013               if (name[2] == 's')
8014               {                                   /* cos        */
8015                 return -KEY_cos;
8016               }
8017
8018               goto unknown;
8019
8020             default:
8021               goto unknown;
8022           }
8023
8024         case 'd':
8025           if (name[1] == 'i' &&
8026               name[2] == 'e')
8027           {                                       /* die        */
8028             return -KEY_die;
8029           }
8030
8031           goto unknown;
8032
8033         case 'e':
8034           switch (name[1])
8035           {
8036             case 'o':
8037               if (name[2] == 'f')
8038               {                                   /* eof        */
8039                 return -KEY_eof;
8040               }
8041
8042               goto unknown;
8043
8044             case 'x':
8045               if (name[2] == 'p')
8046               {                                   /* exp        */
8047                 return -KEY_exp;
8048               }
8049
8050               goto unknown;
8051
8052             default:
8053               goto unknown;
8054           }
8055
8056         case 'f':
8057           if (name[1] == 'o' &&
8058               name[2] == 'r')
8059           {                                       /* for        */
8060             return KEY_for;
8061           }
8062
8063           goto unknown;
8064
8065         case 'h':
8066           if (name[1] == 'e' &&
8067               name[2] == 'x')
8068           {                                       /* hex        */
8069             return -KEY_hex;
8070           }
8071
8072           goto unknown;
8073
8074         case 'i':
8075           if (name[1] == 'n' &&
8076               name[2] == 't')
8077           {                                       /* int        */
8078             return -KEY_int;
8079           }
8080
8081           goto unknown;
8082
8083         case 'l':
8084           if (name[1] == 'o' &&
8085               name[2] == 'g')
8086           {                                       /* log        */
8087             return -KEY_log;
8088           }
8089
8090           goto unknown;
8091
8092         case 'm':
8093           if (name[1] == 'a' &&
8094               name[2] == 'p')
8095           {                                       /* map        */
8096             return KEY_map;
8097           }
8098
8099           goto unknown;
8100
8101         case 'n':
8102           if (name[1] == 'o' &&
8103               name[2] == 't')
8104           {                                       /* not        */
8105             return -KEY_not;
8106           }
8107
8108           goto unknown;
8109
8110         case 'o':
8111           switch (name[1])
8112           {
8113             case 'c':
8114               if (name[2] == 't')
8115               {                                   /* oct        */
8116                 return -KEY_oct;
8117               }
8118
8119               goto unknown;
8120
8121             case 'r':
8122               if (name[2] == 'd')
8123               {                                   /* ord        */
8124                 return -KEY_ord;
8125               }
8126
8127               goto unknown;
8128
8129             case 'u':
8130               if (name[2] == 'r')
8131               {                                   /* our        */
8132                 return KEY_our;
8133               }
8134
8135               goto unknown;
8136
8137             default:
8138               goto unknown;
8139           }
8140
8141         case 'p':
8142           if (name[1] == 'o')
8143           {
8144             switch (name[2])
8145             {
8146               case 'p':
8147                 {                                 /* pop        */
8148                   return -KEY_pop;
8149                 }
8150
8151               case 's':
8152                 {                                 /* pos        */
8153                   return KEY_pos;
8154                 }
8155
8156               default:
8157                 goto unknown;
8158             }
8159           }
8160
8161           goto unknown;
8162
8163         case 'r':
8164           if (name[1] == 'e' &&
8165               name[2] == 'f')
8166           {                                       /* ref        */
8167             return -KEY_ref;
8168           }
8169
8170           goto unknown;
8171
8172         case 's':
8173           switch (name[1])
8174           {
8175             case 'a':
8176               if (name[2] == 'y')
8177               {                                   /* say        */
8178                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8179               }
8180
8181               goto unknown;
8182
8183             case 'i':
8184               if (name[2] == 'n')
8185               {                                   /* sin        */
8186                 return -KEY_sin;
8187               }
8188
8189               goto unknown;
8190
8191             case 'u':
8192               if (name[2] == 'b')
8193               {                                   /* sub        */
8194                 return KEY_sub;
8195               }
8196
8197               goto unknown;
8198
8199             default:
8200               goto unknown;
8201           }
8202
8203         case 't':
8204           if (name[1] == 'i' &&
8205               name[2] == 'e')
8206           {                                       /* tie        */
8207             return KEY_tie;
8208           }
8209
8210           goto unknown;
8211
8212         case 'u':
8213           if (name[1] == 's' &&
8214               name[2] == 'e')
8215           {                                       /* use        */
8216             return KEY_use;
8217           }
8218
8219           goto unknown;
8220
8221         case 'v':
8222           if (name[1] == 'e' &&
8223               name[2] == 'c')
8224           {                                       /* vec        */
8225             return -KEY_vec;
8226           }
8227
8228           goto unknown;
8229
8230         case 'x':
8231           if (name[1] == 'o' &&
8232               name[2] == 'r')
8233           {                                       /* xor        */
8234             return -KEY_xor;
8235           }
8236
8237           goto unknown;
8238
8239         default:
8240           goto unknown;
8241       }
8242
8243     case 4: /* 41 tokens of length 4 */
8244       switch (name[0])
8245       {
8246         case 'C':
8247           if (name[1] == 'O' &&
8248               name[2] == 'R' &&
8249               name[3] == 'E')
8250           {                                       /* CORE       */
8251             return -KEY_CORE;
8252           }
8253
8254           goto unknown;
8255
8256         case 'I':
8257           if (name[1] == 'N' &&
8258               name[2] == 'I' &&
8259               name[3] == 'T')
8260           {                                       /* INIT       */
8261             return KEY_INIT;
8262           }
8263
8264           goto unknown;
8265
8266         case 'b':
8267           if (name[1] == 'i' &&
8268               name[2] == 'n' &&
8269               name[3] == 'd')
8270           {                                       /* bind       */
8271             return -KEY_bind;
8272           }
8273
8274           goto unknown;
8275
8276         case 'c':
8277           if (name[1] == 'h' &&
8278               name[2] == 'o' &&
8279               name[3] == 'p')
8280           {                                       /* chop       */
8281             return -KEY_chop;
8282           }
8283
8284           goto unknown;
8285
8286         case 'd':
8287           if (name[1] == 'u' &&
8288               name[2] == 'm' &&
8289               name[3] == 'p')
8290           {                                       /* dump       */
8291             return -KEY_dump;
8292           }
8293
8294           goto unknown;
8295
8296         case 'e':
8297           switch (name[1])
8298           {
8299             case 'a':
8300               if (name[2] == 'c' &&
8301                   name[3] == 'h')
8302               {                                   /* each       */
8303                 return -KEY_each;
8304               }
8305
8306               goto unknown;
8307
8308             case 'l':
8309               if (name[2] == 's' &&
8310                   name[3] == 'e')
8311               {                                   /* else       */
8312                 return KEY_else;
8313               }
8314
8315               goto unknown;
8316
8317             case 'v':
8318               if (name[2] == 'a' &&
8319                   name[3] == 'l')
8320               {                                   /* eval       */
8321                 return KEY_eval;
8322               }
8323
8324               goto unknown;
8325
8326             case 'x':
8327               switch (name[2])
8328               {
8329                 case 'e':
8330                   if (name[3] == 'c')
8331                   {                               /* exec       */
8332                     return -KEY_exec;
8333                   }
8334
8335                   goto unknown;
8336
8337                 case 'i':
8338                   if (name[3] == 't')
8339                   {                               /* exit       */
8340                     return -KEY_exit;
8341                   }
8342
8343                   goto unknown;
8344
8345                 default:
8346                   goto unknown;
8347               }
8348
8349             default:
8350               goto unknown;
8351           }
8352
8353         case 'f':
8354           if (name[1] == 'o' &&
8355               name[2] == 'r' &&
8356               name[3] == 'k')
8357           {                                       /* fork       */
8358             return -KEY_fork;
8359           }
8360
8361           goto unknown;
8362
8363         case 'g':
8364           switch (name[1])
8365           {
8366             case 'e':
8367               if (name[2] == 't' &&
8368                   name[3] == 'c')
8369               {                                   /* getc       */
8370                 return -KEY_getc;
8371               }
8372
8373               goto unknown;
8374
8375             case 'l':
8376               if (name[2] == 'o' &&
8377                   name[3] == 'b')
8378               {                                   /* glob       */
8379                 return KEY_glob;
8380               }
8381
8382               goto unknown;
8383
8384             case 'o':
8385               if (name[2] == 't' &&
8386                   name[3] == 'o')
8387               {                                   /* goto       */
8388                 return KEY_goto;
8389               }
8390
8391               goto unknown;
8392
8393             case 'r':
8394               if (name[2] == 'e' &&
8395                   name[3] == 'p')
8396               {                                   /* grep       */
8397                 return KEY_grep;
8398               }
8399
8400               goto unknown;
8401
8402             default:
8403               goto unknown;
8404           }
8405
8406         case 'j':
8407           if (name[1] == 'o' &&
8408               name[2] == 'i' &&
8409               name[3] == 'n')
8410           {                                       /* join       */
8411             return -KEY_join;
8412           }
8413
8414           goto unknown;
8415
8416         case 'k':
8417           switch (name[1])
8418           {
8419             case 'e':
8420               if (name[2] == 'y' &&
8421                   name[3] == 's')
8422               {                                   /* keys       */
8423                 return -KEY_keys;
8424               }
8425
8426               goto unknown;
8427
8428             case 'i':
8429               if (name[2] == 'l' &&
8430                   name[3] == 'l')
8431               {                                   /* kill       */
8432                 return -KEY_kill;
8433               }
8434
8435               goto unknown;
8436
8437             default:
8438               goto unknown;
8439           }
8440
8441         case 'l':
8442           switch (name[1])
8443           {
8444             case 'a':
8445               if (name[2] == 's' &&
8446                   name[3] == 't')
8447               {                                   /* last       */
8448                 return KEY_last;
8449               }
8450
8451               goto unknown;
8452
8453             case 'i':
8454               if (name[2] == 'n' &&
8455                   name[3] == 'k')
8456               {                                   /* link       */
8457                 return -KEY_link;
8458               }
8459
8460               goto unknown;
8461
8462             case 'o':
8463               if (name[2] == 'c' &&
8464                   name[3] == 'k')
8465               {                                   /* lock       */
8466                 return -KEY_lock;
8467               }
8468
8469               goto unknown;
8470
8471             default:
8472               goto unknown;
8473           }
8474
8475         case 'n':
8476           if (name[1] == 'e' &&
8477               name[2] == 'x' &&
8478               name[3] == 't')
8479           {                                       /* next       */
8480             return KEY_next;
8481           }
8482
8483           goto unknown;
8484
8485         case 'o':
8486           if (name[1] == 'p' &&
8487               name[2] == 'e' &&
8488               name[3] == 'n')
8489           {                                       /* open       */
8490             return -KEY_open;
8491           }
8492
8493           goto unknown;
8494
8495         case 'p':
8496           switch (name[1])
8497           {
8498             case 'a':
8499               if (name[2] == 'c' &&
8500                   name[3] == 'k')
8501               {                                   /* pack       */
8502                 return -KEY_pack;
8503               }
8504
8505               goto unknown;
8506
8507             case 'i':
8508               if (name[2] == 'p' &&
8509                   name[3] == 'e')
8510               {                                   /* pipe       */
8511                 return -KEY_pipe;
8512               }
8513
8514               goto unknown;
8515
8516             case 'u':
8517               if (name[2] == 's' &&
8518                   name[3] == 'h')
8519               {                                   /* push       */
8520                 return -KEY_push;
8521               }
8522
8523               goto unknown;
8524
8525             default:
8526               goto unknown;
8527           }
8528
8529         case 'r':
8530           switch (name[1])
8531           {
8532             case 'a':
8533               if (name[2] == 'n' &&
8534                   name[3] == 'd')
8535               {                                   /* rand       */
8536                 return -KEY_rand;
8537               }
8538
8539               goto unknown;
8540
8541             case 'e':
8542               switch (name[2])
8543               {
8544                 case 'a':
8545                   if (name[3] == 'd')
8546                   {                               /* read       */
8547                     return -KEY_read;
8548                   }
8549
8550                   goto unknown;
8551
8552                 case 'c':
8553                   if (name[3] == 'v')
8554                   {                               /* recv       */
8555                     return -KEY_recv;
8556                   }
8557
8558                   goto unknown;
8559
8560                 case 'd':
8561                   if (name[3] == 'o')
8562                   {                               /* redo       */
8563                     return KEY_redo;
8564                   }
8565
8566                   goto unknown;
8567
8568                 default:
8569                   goto unknown;
8570               }
8571
8572             default:
8573               goto unknown;
8574           }
8575
8576         case 's':
8577           switch (name[1])
8578           {
8579             case 'e':
8580               switch (name[2])
8581               {
8582                 case 'e':
8583                   if (name[3] == 'k')
8584                   {                               /* seek       */
8585                     return -KEY_seek;
8586                   }
8587
8588                   goto unknown;
8589
8590                 case 'n':
8591                   if (name[3] == 'd')
8592                   {                               /* send       */
8593                     return -KEY_send;
8594                   }
8595
8596                   goto unknown;
8597
8598                 default:
8599                   goto unknown;
8600               }
8601
8602             case 'o':
8603               if (name[2] == 'r' &&
8604                   name[3] == 't')
8605               {                                   /* sort       */
8606                 return KEY_sort;
8607               }
8608
8609               goto unknown;
8610
8611             case 'q':
8612               if (name[2] == 'r' &&
8613                   name[3] == 't')
8614               {                                   /* sqrt       */
8615                 return -KEY_sqrt;
8616               }
8617
8618               goto unknown;
8619
8620             case 't':
8621               if (name[2] == 'a' &&
8622                   name[3] == 't')
8623               {                                   /* stat       */
8624                 return -KEY_stat;
8625               }
8626
8627               goto unknown;
8628
8629             default:
8630               goto unknown;
8631           }
8632
8633         case 't':
8634           switch (name[1])
8635           {
8636             case 'e':
8637               if (name[2] == 'l' &&
8638                   name[3] == 'l')
8639               {                                   /* tell       */
8640                 return -KEY_tell;
8641               }
8642
8643               goto unknown;
8644
8645             case 'i':
8646               switch (name[2])
8647               {
8648                 case 'e':
8649                   if (name[3] == 'd')
8650                   {                               /* tied       */
8651                     return KEY_tied;
8652                   }
8653
8654                   goto unknown;
8655
8656                 case 'm':
8657                   if (name[3] == 'e')
8658                   {                               /* time       */
8659                     return -KEY_time;
8660                   }
8661
8662                   goto unknown;
8663
8664                 default:
8665                   goto unknown;
8666               }
8667
8668             default:
8669               goto unknown;
8670           }
8671
8672         case 'w':
8673           switch (name[1])
8674           {
8675             case 'a':
8676               switch (name[2])
8677               {
8678                 case 'i':
8679                   if (name[3] == 't')
8680                   {                               /* wait       */
8681                     return -KEY_wait;
8682                   }
8683
8684                   goto unknown;
8685
8686                 case 'r':
8687                   if (name[3] == 'n')
8688                   {                               /* warn       */
8689                     return -KEY_warn;
8690                   }
8691
8692                   goto unknown;
8693
8694                 default:
8695                   goto unknown;
8696               }
8697
8698             case 'h':
8699               if (name[2] == 'e' &&
8700                   name[3] == 'n')
8701               {                                   /* when       */
8702                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8703               }
8704
8705               goto unknown;
8706
8707             default:
8708               goto unknown;
8709           }
8710
8711         default:
8712           goto unknown;
8713       }
8714
8715     case 5: /* 39 tokens of length 5 */
8716       switch (name[0])
8717       {
8718         case 'B':
8719           if (name[1] == 'E' &&
8720               name[2] == 'G' &&
8721               name[3] == 'I' &&
8722               name[4] == 'N')
8723           {                                       /* BEGIN      */
8724             return KEY_BEGIN;
8725           }
8726
8727           goto unknown;
8728
8729         case 'C':
8730           if (name[1] == 'H' &&
8731               name[2] == 'E' &&
8732               name[3] == 'C' &&
8733               name[4] == 'K')
8734           {                                       /* CHECK      */
8735             return KEY_CHECK;
8736           }
8737
8738           goto unknown;
8739
8740         case 'a':
8741           switch (name[1])
8742           {
8743             case 'l':
8744               if (name[2] == 'a' &&
8745                   name[3] == 'r' &&
8746                   name[4] == 'm')
8747               {                                   /* alarm      */
8748                 return -KEY_alarm;
8749               }
8750
8751               goto unknown;
8752
8753             case 't':
8754               if (name[2] == 'a' &&
8755                   name[3] == 'n' &&
8756                   name[4] == '2')
8757               {                                   /* atan2      */
8758                 return -KEY_atan2;
8759               }
8760
8761               goto unknown;
8762
8763             default:
8764               goto unknown;
8765           }
8766
8767         case 'b':
8768           switch (name[1])
8769           {
8770             case 'l':
8771               if (name[2] == 'e' &&
8772                   name[3] == 's' &&
8773                   name[4] == 's')
8774               {                                   /* bless      */
8775                 return -KEY_bless;
8776               }
8777
8778               goto unknown;
8779
8780             case 'r':
8781               if (name[2] == 'e' &&
8782                   name[3] == 'a' &&
8783                   name[4] == 'k')
8784               {                                   /* break      */
8785                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8786               }
8787
8788               goto unknown;
8789
8790             default:
8791               goto unknown;
8792           }
8793
8794         case 'c':
8795           switch (name[1])
8796           {
8797             case 'h':
8798               switch (name[2])
8799               {
8800                 case 'd':
8801                   if (name[3] == 'i' &&
8802                       name[4] == 'r')
8803                   {                               /* chdir      */
8804                     return -KEY_chdir;
8805                   }
8806
8807                   goto unknown;
8808
8809                 case 'm':
8810                   if (name[3] == 'o' &&
8811                       name[4] == 'd')
8812                   {                               /* chmod      */
8813                     return -KEY_chmod;
8814                   }
8815
8816                   goto unknown;
8817
8818                 case 'o':
8819                   switch (name[3])
8820                   {
8821                     case 'm':
8822                       if (name[4] == 'p')
8823                       {                           /* chomp      */
8824                         return -KEY_chomp;
8825                       }
8826
8827                       goto unknown;
8828
8829                     case 'w':
8830                       if (name[4] == 'n')
8831                       {                           /* chown      */
8832                         return -KEY_chown;
8833                       }
8834
8835                       goto unknown;
8836
8837                     default:
8838                       goto unknown;
8839                   }
8840
8841                 default:
8842                   goto unknown;
8843               }
8844
8845             case 'l':
8846               if (name[2] == 'o' &&
8847                   name[3] == 's' &&
8848                   name[4] == 'e')
8849               {                                   /* close      */
8850                 return -KEY_close;
8851               }
8852
8853               goto unknown;
8854
8855             case 'r':
8856               if (name[2] == 'y' &&
8857                   name[3] == 'p' &&
8858                   name[4] == 't')
8859               {                                   /* crypt      */
8860                 return -KEY_crypt;
8861               }
8862
8863               goto unknown;
8864
8865             default:
8866               goto unknown;
8867           }
8868
8869         case 'e':
8870           if (name[1] == 'l' &&
8871               name[2] == 's' &&
8872               name[3] == 'i' &&
8873               name[4] == 'f')
8874           {                                       /* elsif      */
8875             return KEY_elsif;
8876           }
8877
8878           goto unknown;
8879
8880         case 'f':
8881           switch (name[1])
8882           {
8883             case 'c':
8884               if (name[2] == 'n' &&
8885                   name[3] == 't' &&
8886                   name[4] == 'l')
8887               {                                   /* fcntl      */
8888                 return -KEY_fcntl;
8889               }
8890
8891               goto unknown;
8892
8893             case 'l':
8894               if (name[2] == 'o' &&
8895                   name[3] == 'c' &&
8896                   name[4] == 'k')
8897               {                                   /* flock      */
8898                 return -KEY_flock;
8899               }
8900
8901               goto unknown;
8902
8903             default:
8904               goto unknown;
8905           }
8906
8907         case 'g':
8908           if (name[1] == 'i' &&
8909               name[2] == 'v' &&
8910               name[3] == 'e' &&
8911               name[4] == 'n')
8912           {                                       /* given      */
8913             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8914           }
8915
8916           goto unknown;
8917
8918         case 'i':
8919           switch (name[1])
8920           {
8921             case 'n':
8922               if (name[2] == 'd' &&
8923                   name[3] == 'e' &&
8924                   name[4] == 'x')
8925               {                                   /* index      */
8926                 return -KEY_index;
8927               }
8928
8929               goto unknown;
8930
8931             case 'o':
8932               if (name[2] == 'c' &&
8933                   name[3] == 't' &&
8934                   name[4] == 'l')
8935               {                                   /* ioctl      */
8936                 return -KEY_ioctl;
8937               }
8938
8939               goto unknown;
8940
8941             default:
8942               goto unknown;
8943           }
8944
8945         case 'l':
8946           switch (name[1])
8947           {
8948             case 'o':
8949               if (name[2] == 'c' &&
8950                   name[3] == 'a' &&
8951                   name[4] == 'l')
8952               {                                   /* local      */
8953                 return KEY_local;
8954               }
8955
8956               goto unknown;
8957
8958             case 's':
8959               if (name[2] == 't' &&
8960                   name[3] == 'a' &&
8961                   name[4] == 't')
8962               {                                   /* lstat      */
8963                 return -KEY_lstat;
8964               }
8965
8966               goto unknown;
8967
8968             default:
8969               goto unknown;
8970           }
8971
8972         case 'm':
8973           if (name[1] == 'k' &&
8974               name[2] == 'd' &&
8975               name[3] == 'i' &&
8976               name[4] == 'r')
8977           {                                       /* mkdir      */
8978             return -KEY_mkdir;
8979           }
8980
8981           goto unknown;
8982
8983         case 'p':
8984           if (name[1] == 'r' &&
8985               name[2] == 'i' &&
8986               name[3] == 'n' &&
8987               name[4] == 't')
8988           {                                       /* print      */
8989             return KEY_print;
8990           }
8991
8992           goto unknown;
8993
8994         case 'r':
8995           switch (name[1])
8996           {
8997             case 'e':
8998               if (name[2] == 's' &&
8999                   name[3] == 'e' &&
9000                   name[4] == 't')
9001               {                                   /* reset      */
9002                 return -KEY_reset;
9003               }
9004
9005               goto unknown;
9006
9007             case 'm':
9008               if (name[2] == 'd' &&
9009                   name[3] == 'i' &&
9010                   name[4] == 'r')
9011               {                                   /* rmdir      */
9012                 return -KEY_rmdir;
9013               }
9014
9015               goto unknown;
9016
9017             default:
9018               goto unknown;
9019           }
9020
9021         case 's':
9022           switch (name[1])
9023           {
9024             case 'e':
9025               if (name[2] == 'm' &&
9026                   name[3] == 'o' &&
9027                   name[4] == 'p')
9028               {                                   /* semop      */
9029                 return -KEY_semop;
9030               }
9031
9032               goto unknown;
9033
9034             case 'h':
9035               if (name[2] == 'i' &&
9036                   name[3] == 'f' &&
9037                   name[4] == 't')
9038               {                                   /* shift      */
9039                 return -KEY_shift;
9040               }
9041
9042               goto unknown;
9043
9044             case 'l':
9045               if (name[2] == 'e' &&
9046                   name[3] == 'e' &&
9047                   name[4] == 'p')
9048               {                                   /* sleep      */
9049                 return -KEY_sleep;
9050               }
9051
9052               goto unknown;
9053
9054             case 'p':
9055               if (name[2] == 'l' &&
9056                   name[3] == 'i' &&
9057                   name[4] == 't')
9058               {                                   /* split      */
9059                 return KEY_split;
9060               }
9061
9062               goto unknown;
9063
9064             case 'r':
9065               if (name[2] == 'a' &&
9066                   name[3] == 'n' &&
9067                   name[4] == 'd')
9068               {                                   /* srand      */
9069                 return -KEY_srand;
9070               }
9071
9072               goto unknown;
9073
9074             case 't':
9075               switch (name[2])
9076               {
9077                 case 'a':
9078                   if (name[3] == 't' &&
9079                       name[4] == 'e')
9080                   {                               /* state      */
9081                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9082                   }
9083
9084                   goto unknown;
9085
9086                 case 'u':
9087                   if (name[3] == 'd' &&
9088                       name[4] == 'y')
9089                   {                               /* study      */
9090                     return KEY_study;
9091                   }
9092
9093                   goto unknown;
9094
9095                 default:
9096                   goto unknown;
9097               }
9098
9099             default:
9100               goto unknown;
9101           }
9102
9103         case 't':
9104           if (name[1] == 'i' &&
9105               name[2] == 'm' &&
9106               name[3] == 'e' &&
9107               name[4] == 's')
9108           {                                       /* times      */
9109             return -KEY_times;
9110           }
9111
9112           goto unknown;
9113
9114         case 'u':
9115           switch (name[1])
9116           {
9117             case 'm':
9118               if (name[2] == 'a' &&
9119                   name[3] == 's' &&
9120                   name[4] == 'k')
9121               {                                   /* umask      */
9122                 return -KEY_umask;
9123               }
9124
9125               goto unknown;
9126
9127             case 'n':
9128               switch (name[2])
9129               {
9130                 case 'd':
9131                   if (name[3] == 'e' &&
9132                       name[4] == 'f')
9133                   {                               /* undef      */
9134                     return KEY_undef;
9135                   }
9136
9137                   goto unknown;
9138
9139                 case 't':
9140                   if (name[3] == 'i')
9141                   {
9142                     switch (name[4])
9143                     {
9144                       case 'e':
9145                         {                         /* untie      */
9146                           return KEY_untie;
9147                         }
9148
9149                       case 'l':
9150                         {                         /* until      */
9151                           return KEY_until;
9152                         }
9153
9154                       default:
9155                         goto unknown;
9156                     }
9157                   }
9158
9159                   goto unknown;
9160
9161                 default:
9162                   goto unknown;
9163               }
9164
9165             case 't':
9166               if (name[2] == 'i' &&
9167                   name[3] == 'm' &&
9168                   name[4] == 'e')
9169               {                                   /* utime      */
9170                 return -KEY_utime;
9171               }
9172
9173               goto unknown;
9174
9175             default:
9176               goto unknown;
9177           }
9178
9179         case 'w':
9180           switch (name[1])
9181           {
9182             case 'h':
9183               if (name[2] == 'i' &&
9184                   name[3] == 'l' &&
9185                   name[4] == 'e')
9186               {                                   /* while      */
9187                 return KEY_while;
9188               }
9189
9190               goto unknown;
9191
9192             case 'r':
9193               if (name[2] == 'i' &&
9194                   name[3] == 't' &&
9195                   name[4] == 'e')
9196               {                                   /* write      */
9197                 return -KEY_write;
9198               }
9199
9200               goto unknown;
9201
9202             default:
9203               goto unknown;
9204           }
9205
9206         default:
9207           goto unknown;
9208       }
9209
9210     case 6: /* 33 tokens of length 6 */
9211       switch (name[0])
9212       {
9213         case 'a':
9214           if (name[1] == 'c' &&
9215               name[2] == 'c' &&
9216               name[3] == 'e' &&
9217               name[4] == 'p' &&
9218               name[5] == 't')
9219           {                                       /* accept     */
9220             return -KEY_accept;
9221           }
9222
9223           goto unknown;
9224
9225         case 'c':
9226           switch (name[1])
9227           {
9228             case 'a':
9229               if (name[2] == 'l' &&
9230                   name[3] == 'l' &&
9231                   name[4] == 'e' &&
9232                   name[5] == 'r')
9233               {                                   /* caller     */
9234                 return -KEY_caller;
9235               }
9236
9237               goto unknown;
9238
9239             case 'h':
9240               if (name[2] == 'r' &&
9241                   name[3] == 'o' &&
9242                   name[4] == 'o' &&
9243                   name[5] == 't')
9244               {                                   /* chroot     */
9245                 return -KEY_chroot;
9246               }
9247
9248               goto unknown;
9249
9250             default:
9251               goto unknown;
9252           }
9253
9254         case 'd':
9255           if (name[1] == 'e' &&
9256               name[2] == 'l' &&
9257               name[3] == 'e' &&
9258               name[4] == 't' &&
9259               name[5] == 'e')
9260           {                                       /* delete     */
9261             return KEY_delete;
9262           }
9263
9264           goto unknown;
9265
9266         case 'e':
9267           switch (name[1])
9268           {
9269             case 'l':
9270               if (name[2] == 's' &&
9271                   name[3] == 'e' &&
9272                   name[4] == 'i' &&
9273                   name[5] == 'f')
9274               {                                   /* elseif     */
9275                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9276               }
9277
9278               goto unknown;
9279
9280             case 'x':
9281               if (name[2] == 'i' &&
9282                   name[3] == 's' &&
9283                   name[4] == 't' &&
9284                   name[5] == 's')
9285               {                                   /* exists     */
9286                 return KEY_exists;
9287               }
9288
9289               goto unknown;
9290
9291             default:
9292               goto unknown;
9293           }
9294
9295         case 'f':
9296           switch (name[1])
9297           {
9298             case 'i':
9299               if (name[2] == 'l' &&
9300                   name[3] == 'e' &&
9301                   name[4] == 'n' &&
9302                   name[5] == 'o')
9303               {                                   /* fileno     */
9304                 return -KEY_fileno;
9305               }
9306
9307               goto unknown;
9308
9309             case 'o':
9310               if (name[2] == 'r' &&
9311                   name[3] == 'm' &&
9312                   name[4] == 'a' &&
9313                   name[5] == 't')
9314               {                                   /* format     */
9315                 return KEY_format;
9316               }
9317
9318               goto unknown;
9319
9320             default:
9321               goto unknown;
9322           }
9323
9324         case 'g':
9325           if (name[1] == 'm' &&
9326               name[2] == 't' &&
9327               name[3] == 'i' &&
9328               name[4] == 'm' &&
9329               name[5] == 'e')
9330           {                                       /* gmtime     */
9331             return -KEY_gmtime;
9332           }
9333
9334           goto unknown;
9335
9336         case 'l':
9337           switch (name[1])
9338           {
9339             case 'e':
9340               if (name[2] == 'n' &&
9341                   name[3] == 'g' &&
9342                   name[4] == 't' &&
9343                   name[5] == 'h')
9344               {                                   /* length     */
9345                 return -KEY_length;
9346               }
9347
9348               goto unknown;
9349
9350             case 'i':
9351               if (name[2] == 's' &&
9352                   name[3] == 't' &&
9353                   name[4] == 'e' &&
9354                   name[5] == 'n')
9355               {                                   /* listen     */
9356                 return -KEY_listen;
9357               }
9358
9359               goto unknown;
9360
9361             default:
9362               goto unknown;
9363           }
9364
9365         case 'm':
9366           if (name[1] == 's' &&
9367               name[2] == 'g')
9368           {
9369             switch (name[3])
9370             {
9371               case 'c':
9372                 if (name[4] == 't' &&
9373                     name[5] == 'l')
9374                 {                                 /* msgctl     */
9375                   return -KEY_msgctl;
9376                 }
9377
9378                 goto unknown;
9379
9380               case 'g':
9381                 if (name[4] == 'e' &&
9382                     name[5] == 't')
9383                 {                                 /* msgget     */
9384                   return -KEY_msgget;
9385                 }
9386
9387                 goto unknown;
9388
9389               case 'r':
9390                 if (name[4] == 'c' &&
9391                     name[5] == 'v')
9392                 {                                 /* msgrcv     */
9393                   return -KEY_msgrcv;
9394                 }
9395
9396                 goto unknown;
9397
9398               case 's':
9399                 if (name[4] == 'n' &&
9400                     name[5] == 'd')
9401                 {                                 /* msgsnd     */
9402                   return -KEY_msgsnd;
9403                 }
9404
9405                 goto unknown;
9406
9407               default:
9408                 goto unknown;
9409             }
9410           }
9411
9412           goto unknown;
9413
9414         case 'p':
9415           if (name[1] == 'r' &&
9416               name[2] == 'i' &&
9417               name[3] == 'n' &&
9418               name[4] == 't' &&
9419               name[5] == 'f')
9420           {                                       /* printf     */
9421             return KEY_printf;
9422           }
9423
9424           goto unknown;
9425
9426         case 'r':
9427           switch (name[1])
9428           {
9429             case 'e':
9430               switch (name[2])
9431               {
9432                 case 'n':
9433                   if (name[3] == 'a' &&
9434                       name[4] == 'm' &&
9435                       name[5] == 'e')
9436                   {                               /* rename     */
9437                     return -KEY_rename;
9438                   }
9439
9440                   goto unknown;
9441
9442                 case 't':
9443                   if (name[3] == 'u' &&
9444                       name[4] == 'r' &&
9445                       name[5] == 'n')
9446                   {                               /* return     */
9447                     return KEY_return;
9448                   }
9449
9450                   goto unknown;
9451
9452                 default:
9453                   goto unknown;
9454               }
9455
9456             case 'i':
9457               if (name[2] == 'n' &&
9458                   name[3] == 'd' &&
9459                   name[4] == 'e' &&
9460                   name[5] == 'x')
9461               {                                   /* rindex     */
9462                 return -KEY_rindex;
9463               }
9464
9465               goto unknown;
9466
9467             default:
9468               goto unknown;
9469           }
9470
9471         case 's':
9472           switch (name[1])
9473           {
9474             case 'c':
9475               if (name[2] == 'a' &&
9476                   name[3] == 'l' &&
9477                   name[4] == 'a' &&
9478                   name[5] == 'r')
9479               {                                   /* scalar     */
9480                 return KEY_scalar;
9481               }
9482
9483               goto unknown;
9484
9485             case 'e':
9486               switch (name[2])
9487               {
9488                 case 'l':
9489                   if (name[3] == 'e' &&
9490                       name[4] == 'c' &&
9491                       name[5] == 't')
9492                   {                               /* select     */
9493                     return -KEY_select;
9494                   }
9495
9496                   goto unknown;
9497
9498                 case 'm':
9499                   switch (name[3])
9500                   {
9501                     case 'c':
9502                       if (name[4] == 't' &&
9503                           name[5] == 'l')
9504                       {                           /* semctl     */
9505                         return -KEY_semctl;
9506                       }
9507
9508                       goto unknown;
9509
9510                     case 'g':
9511                       if (name[4] == 'e' &&
9512                           name[5] == 't')
9513                       {                           /* semget     */
9514                         return -KEY_semget;
9515                       }
9516
9517                       goto unknown;
9518
9519                     default:
9520                       goto unknown;
9521                   }
9522
9523                 default:
9524                   goto unknown;
9525               }
9526
9527             case 'h':
9528               if (name[2] == 'm')
9529               {
9530                 switch (name[3])
9531                 {
9532                   case 'c':
9533                     if (name[4] == 't' &&
9534                         name[5] == 'l')
9535                     {                             /* shmctl     */
9536                       return -KEY_shmctl;
9537                     }
9538
9539                     goto unknown;
9540
9541                   case 'g':
9542                     if (name[4] == 'e' &&
9543                         name[5] == 't')
9544                     {                             /* shmget     */
9545                       return -KEY_shmget;
9546                     }
9547
9548                     goto unknown;
9549
9550                   default:
9551                     goto unknown;
9552                 }
9553               }
9554
9555               goto unknown;
9556
9557             case 'o':
9558               if (name[2] == 'c' &&
9559                   name[3] == 'k' &&
9560                   name[4] == 'e' &&
9561                   name[5] == 't')
9562               {                                   /* socket     */
9563                 return -KEY_socket;
9564               }
9565
9566               goto unknown;
9567
9568             case 'p':
9569               if (name[2] == 'l' &&
9570                   name[3] == 'i' &&
9571                   name[4] == 'c' &&
9572                   name[5] == 'e')
9573               {                                   /* splice     */
9574                 return -KEY_splice;
9575               }
9576
9577               goto unknown;
9578
9579             case 'u':
9580               if (name[2] == 'b' &&
9581                   name[3] == 's' &&
9582                   name[4] == 't' &&
9583                   name[5] == 'r')
9584               {                                   /* substr     */
9585                 return -KEY_substr;
9586               }
9587
9588               goto unknown;
9589
9590             case 'y':
9591               if (name[2] == 's' &&
9592                   name[3] == 't' &&
9593                   name[4] == 'e' &&
9594                   name[5] == 'm')
9595               {                                   /* system     */
9596                 return -KEY_system;
9597               }
9598
9599               goto unknown;
9600
9601             default:
9602               goto unknown;
9603           }
9604
9605         case 'u':
9606           if (name[1] == 'n')
9607           {
9608             switch (name[2])
9609             {
9610               case 'l':
9611                 switch (name[3])
9612                 {
9613                   case 'e':
9614                     if (name[4] == 's' &&
9615                         name[5] == 's')
9616                     {                             /* unless     */
9617                       return KEY_unless;
9618                     }
9619
9620                     goto unknown;
9621
9622                   case 'i':
9623                     if (name[4] == 'n' &&
9624                         name[5] == 'k')
9625                     {                             /* unlink     */
9626                       return -KEY_unlink;
9627                     }
9628
9629                     goto unknown;
9630
9631                   default:
9632                     goto unknown;
9633                 }
9634
9635               case 'p':
9636                 if (name[3] == 'a' &&
9637                     name[4] == 'c' &&
9638                     name[5] == 'k')
9639                 {                                 /* unpack     */
9640                   return -KEY_unpack;
9641                 }
9642
9643                 goto unknown;
9644
9645               default:
9646                 goto unknown;
9647             }
9648           }
9649
9650           goto unknown;
9651
9652         case 'v':
9653           if (name[1] == 'a' &&
9654               name[2] == 'l' &&
9655               name[3] == 'u' &&
9656               name[4] == 'e' &&
9657               name[5] == 's')
9658           {                                       /* values     */
9659             return -KEY_values;
9660           }
9661
9662           goto unknown;
9663
9664         default:
9665           goto unknown;
9666       }
9667
9668     case 7: /* 29 tokens of length 7 */
9669       switch (name[0])
9670       {
9671         case 'D':
9672           if (name[1] == 'E' &&
9673               name[2] == 'S' &&
9674               name[3] == 'T' &&
9675               name[4] == 'R' &&
9676               name[5] == 'O' &&
9677               name[6] == 'Y')
9678           {                                       /* DESTROY    */
9679             return KEY_DESTROY;
9680           }
9681
9682           goto unknown;
9683
9684         case '_':
9685           if (name[1] == '_' &&
9686               name[2] == 'E' &&
9687               name[3] == 'N' &&
9688               name[4] == 'D' &&
9689               name[5] == '_' &&
9690               name[6] == '_')
9691           {                                       /* __END__    */
9692             return KEY___END__;
9693           }
9694
9695           goto unknown;
9696
9697         case 'b':
9698           if (name[1] == 'i' &&
9699               name[2] == 'n' &&
9700               name[3] == 'm' &&
9701               name[4] == 'o' &&
9702               name[5] == 'd' &&
9703               name[6] == 'e')
9704           {                                       /* binmode    */
9705             return -KEY_binmode;
9706           }
9707
9708           goto unknown;
9709
9710         case 'c':
9711           if (name[1] == 'o' &&
9712               name[2] == 'n' &&
9713               name[3] == 'n' &&
9714               name[4] == 'e' &&
9715               name[5] == 'c' &&
9716               name[6] == 't')
9717           {                                       /* connect    */
9718             return -KEY_connect;
9719           }
9720
9721           goto unknown;
9722
9723         case 'd':
9724           switch (name[1])
9725           {
9726             case 'b':
9727               if (name[2] == 'm' &&
9728                   name[3] == 'o' &&
9729                   name[4] == 'p' &&
9730                   name[5] == 'e' &&
9731                   name[6] == 'n')
9732               {                                   /* dbmopen    */
9733                 return -KEY_dbmopen;
9734               }
9735
9736               goto unknown;
9737
9738             case 'e':
9739               if (name[2] == 'f')
9740               {
9741                 switch (name[3])
9742                 {
9743                   case 'a':
9744                     if (name[4] == 'u' &&
9745                         name[5] == 'l' &&
9746                         name[6] == 't')
9747                     {                             /* default    */
9748                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9749                     }
9750
9751                     goto unknown;
9752
9753                   case 'i':
9754                     if (name[4] == 'n' &&
9755                         name[5] == 'e' &&
9756                         name[6] == 'd')
9757                     {                             /* defined    */
9758                       return KEY_defined;
9759                     }
9760
9761                     goto unknown;
9762
9763                   default:
9764                     goto unknown;
9765                 }
9766               }
9767
9768               goto unknown;
9769
9770             default:
9771               goto unknown;
9772           }
9773
9774         case 'f':
9775           if (name[1] == 'o' &&
9776               name[2] == 'r' &&
9777               name[3] == 'e' &&
9778               name[4] == 'a' &&
9779               name[5] == 'c' &&
9780               name[6] == 'h')
9781           {                                       /* foreach    */
9782             return KEY_foreach;
9783           }
9784
9785           goto unknown;
9786
9787         case 'g':
9788           if (name[1] == 'e' &&
9789               name[2] == 't' &&
9790               name[3] == 'p')
9791           {
9792             switch (name[4])
9793             {
9794               case 'g':
9795                 if (name[5] == 'r' &&
9796                     name[6] == 'p')
9797                 {                                 /* getpgrp    */
9798                   return -KEY_getpgrp;
9799                 }
9800
9801                 goto unknown;
9802
9803               case 'p':
9804                 if (name[5] == 'i' &&
9805                     name[6] == 'd')
9806                 {                                 /* getppid    */
9807                   return -KEY_getppid;
9808                 }
9809
9810                 goto unknown;
9811
9812               default:
9813                 goto unknown;
9814             }
9815           }
9816
9817           goto unknown;
9818
9819         case 'l':
9820           if (name[1] == 'c' &&
9821               name[2] == 'f' &&
9822               name[3] == 'i' &&
9823               name[4] == 'r' &&
9824               name[5] == 's' &&
9825               name[6] == 't')
9826           {                                       /* lcfirst    */
9827             return -KEY_lcfirst;
9828           }
9829
9830           goto unknown;
9831
9832         case 'o':
9833           if (name[1] == 'p' &&
9834               name[2] == 'e' &&
9835               name[3] == 'n' &&
9836               name[4] == 'd' &&
9837               name[5] == 'i' &&
9838               name[6] == 'r')
9839           {                                       /* opendir    */
9840             return -KEY_opendir;
9841           }
9842
9843           goto unknown;
9844
9845         case 'p':
9846           if (name[1] == 'a' &&
9847               name[2] == 'c' &&
9848               name[3] == 'k' &&
9849               name[4] == 'a' &&
9850               name[5] == 'g' &&
9851               name[6] == 'e')
9852           {                                       /* package    */
9853             return KEY_package;
9854           }
9855
9856           goto unknown;
9857
9858         case 'r':
9859           if (name[1] == 'e')
9860           {
9861             switch (name[2])
9862             {
9863               case 'a':
9864                 if (name[3] == 'd' &&
9865                     name[4] == 'd' &&
9866                     name[5] == 'i' &&
9867                     name[6] == 'r')
9868                 {                                 /* readdir    */
9869                   return -KEY_readdir;
9870                 }
9871
9872                 goto unknown;
9873
9874               case 'q':
9875                 if (name[3] == 'u' &&
9876                     name[4] == 'i' &&
9877                     name[5] == 'r' &&
9878                     name[6] == 'e')
9879                 {                                 /* require    */
9880                   return KEY_require;
9881                 }
9882
9883                 goto unknown;
9884
9885               case 'v':
9886                 if (name[3] == 'e' &&
9887                     name[4] == 'r' &&
9888                     name[5] == 's' &&
9889                     name[6] == 'e')
9890                 {                                 /* reverse    */
9891                   return -KEY_reverse;
9892                 }
9893
9894                 goto unknown;
9895
9896               default:
9897                 goto unknown;
9898             }
9899           }
9900
9901           goto unknown;
9902
9903         case 's':
9904           switch (name[1])
9905           {
9906             case 'e':
9907               switch (name[2])
9908               {
9909                 case 'e':
9910                   if (name[3] == 'k' &&
9911                       name[4] == 'd' &&
9912                       name[5] == 'i' &&
9913                       name[6] == 'r')
9914                   {                               /* seekdir    */
9915                     return -KEY_seekdir;
9916                   }
9917
9918                   goto unknown;
9919
9920                 case 't':
9921                   if (name[3] == 'p' &&
9922                       name[4] == 'g' &&
9923                       name[5] == 'r' &&
9924                       name[6] == 'p')
9925                   {                               /* setpgrp    */
9926                     return -KEY_setpgrp;
9927                   }
9928
9929                   goto unknown;
9930
9931                 default:
9932                   goto unknown;
9933               }
9934
9935             case 'h':
9936               if (name[2] == 'm' &&
9937                   name[3] == 'r' &&
9938                   name[4] == 'e' &&
9939                   name[5] == 'a' &&
9940                   name[6] == 'd')
9941               {                                   /* shmread    */
9942                 return -KEY_shmread;
9943               }
9944
9945               goto unknown;
9946
9947             case 'p':
9948               if (name[2] == 'r' &&
9949                   name[3] == 'i' &&
9950                   name[4] == 'n' &&
9951                   name[5] == 't' &&
9952                   name[6] == 'f')
9953               {                                   /* sprintf    */
9954                 return -KEY_sprintf;
9955               }
9956
9957               goto unknown;
9958
9959             case 'y':
9960               switch (name[2])
9961               {
9962                 case 'm':
9963                   if (name[3] == 'l' &&
9964                       name[4] == 'i' &&
9965                       name[5] == 'n' &&
9966                       name[6] == 'k')
9967                   {                               /* symlink    */
9968                     return -KEY_symlink;
9969                   }
9970
9971                   goto unknown;
9972
9973                 case 's':
9974                   switch (name[3])
9975                   {
9976                     case 'c':
9977                       if (name[4] == 'a' &&
9978                           name[5] == 'l' &&
9979                           name[6] == 'l')
9980                       {                           /* syscall    */
9981                         return -KEY_syscall;
9982                       }
9983
9984                       goto unknown;
9985
9986                     case 'o':
9987                       if (name[4] == 'p' &&
9988                           name[5] == 'e' &&
9989                           name[6] == 'n')
9990                       {                           /* sysopen    */
9991                         return -KEY_sysopen;
9992                       }
9993
9994                       goto unknown;
9995
9996                     case 'r':
9997                       if (name[4] == 'e' &&
9998                           name[5] == 'a' &&
9999                           name[6] == 'd')
10000                       {                           /* sysread    */
10001                         return -KEY_sysread;
10002                       }
10003
10004                       goto unknown;
10005
10006                     case 's':
10007                       if (name[4] == 'e' &&
10008                           name[5] == 'e' &&
10009                           name[6] == 'k')
10010                       {                           /* sysseek    */
10011                         return -KEY_sysseek;
10012                       }
10013
10014                       goto unknown;
10015
10016                     default:
10017                       goto unknown;
10018                   }
10019
10020                 default:
10021                   goto unknown;
10022               }
10023
10024             default:
10025               goto unknown;
10026           }
10027
10028         case 't':
10029           if (name[1] == 'e' &&
10030               name[2] == 'l' &&
10031               name[3] == 'l' &&
10032               name[4] == 'd' &&
10033               name[5] == 'i' &&
10034               name[6] == 'r')
10035           {                                       /* telldir    */
10036             return -KEY_telldir;
10037           }
10038
10039           goto unknown;
10040
10041         case 'u':
10042           switch (name[1])
10043           {
10044             case 'c':
10045               if (name[2] == 'f' &&
10046                   name[3] == 'i' &&
10047                   name[4] == 'r' &&
10048                   name[5] == 's' &&
10049                   name[6] == 't')
10050               {                                   /* ucfirst    */
10051                 return -KEY_ucfirst;
10052               }
10053
10054               goto unknown;
10055
10056             case 'n':
10057               if (name[2] == 's' &&
10058                   name[3] == 'h' &&
10059                   name[4] == 'i' &&
10060                   name[5] == 'f' &&
10061                   name[6] == 't')
10062               {                                   /* unshift    */
10063                 return -KEY_unshift;
10064               }
10065
10066               goto unknown;
10067
10068             default:
10069               goto unknown;
10070           }
10071
10072         case 'w':
10073           if (name[1] == 'a' &&
10074               name[2] == 'i' &&
10075               name[3] == 't' &&
10076               name[4] == 'p' &&
10077               name[5] == 'i' &&
10078               name[6] == 'd')
10079           {                                       /* waitpid    */
10080             return -KEY_waitpid;
10081           }
10082
10083           goto unknown;
10084
10085         default:
10086           goto unknown;
10087       }
10088
10089     case 8: /* 26 tokens of length 8 */
10090       switch (name[0])
10091       {
10092         case 'A':
10093           if (name[1] == 'U' &&
10094               name[2] == 'T' &&
10095               name[3] == 'O' &&
10096               name[4] == 'L' &&
10097               name[5] == 'O' &&
10098               name[6] == 'A' &&
10099               name[7] == 'D')
10100           {                                       /* AUTOLOAD   */
10101             return KEY_AUTOLOAD;
10102           }
10103
10104           goto unknown;
10105
10106         case '_':
10107           if (name[1] == '_')
10108           {
10109             switch (name[2])
10110             {
10111               case 'D':
10112                 if (name[3] == 'A' &&
10113                     name[4] == 'T' &&
10114                     name[5] == 'A' &&
10115                     name[6] == '_' &&
10116                     name[7] == '_')
10117                 {                                 /* __DATA__   */
10118                   return KEY___DATA__;
10119                 }
10120
10121                 goto unknown;
10122
10123               case 'F':
10124                 if (name[3] == 'I' &&
10125                     name[4] == 'L' &&
10126                     name[5] == 'E' &&
10127                     name[6] == '_' &&
10128                     name[7] == '_')
10129                 {                                 /* __FILE__   */
10130                   return -KEY___FILE__;
10131                 }
10132
10133                 goto unknown;
10134
10135               case 'L':
10136                 if (name[3] == 'I' &&
10137                     name[4] == 'N' &&
10138                     name[5] == 'E' &&
10139                     name[6] == '_' &&
10140                     name[7] == '_')
10141                 {                                 /* __LINE__   */
10142                   return -KEY___LINE__;
10143                 }
10144
10145                 goto unknown;
10146
10147               default:
10148                 goto unknown;
10149             }
10150           }
10151
10152           goto unknown;
10153
10154         case 'c':
10155           switch (name[1])
10156           {
10157             case 'l':
10158               if (name[2] == 'o' &&
10159                   name[3] == 's' &&
10160                   name[4] == 'e' &&
10161                   name[5] == 'd' &&
10162                   name[6] == 'i' &&
10163                   name[7] == 'r')
10164               {                                   /* closedir   */
10165                 return -KEY_closedir;
10166               }
10167
10168               goto unknown;
10169
10170             case 'o':
10171               if (name[2] == 'n' &&
10172                   name[3] == 't' &&
10173                   name[4] == 'i' &&
10174                   name[5] == 'n' &&
10175                   name[6] == 'u' &&
10176                   name[7] == 'e')
10177               {                                   /* continue   */
10178                 return -KEY_continue;
10179               }
10180
10181               goto unknown;
10182
10183             default:
10184               goto unknown;
10185           }
10186
10187         case 'd':
10188           if (name[1] == 'b' &&
10189               name[2] == 'm' &&
10190               name[3] == 'c' &&
10191               name[4] == 'l' &&
10192               name[5] == 'o' &&
10193               name[6] == 's' &&
10194               name[7] == 'e')
10195           {                                       /* dbmclose   */
10196             return -KEY_dbmclose;
10197           }
10198
10199           goto unknown;
10200
10201         case 'e':
10202           if (name[1] == 'n' &&
10203               name[2] == 'd')
10204           {
10205             switch (name[3])
10206             {
10207               case 'g':
10208                 if (name[4] == 'r' &&
10209                     name[5] == 'e' &&
10210                     name[6] == 'n' &&
10211                     name[7] == 't')
10212                 {                                 /* endgrent   */
10213                   return -KEY_endgrent;
10214                 }
10215
10216                 goto unknown;
10217
10218               case 'p':
10219                 if (name[4] == 'w' &&
10220                     name[5] == 'e' &&
10221                     name[6] == 'n' &&
10222                     name[7] == 't')
10223                 {                                 /* endpwent   */
10224                   return -KEY_endpwent;
10225                 }
10226
10227                 goto unknown;
10228
10229               default:
10230                 goto unknown;
10231             }
10232           }
10233
10234           goto unknown;
10235
10236         case 'f':
10237           if (name[1] == 'o' &&
10238               name[2] == 'r' &&
10239               name[3] == 'm' &&
10240               name[4] == 'l' &&
10241               name[5] == 'i' &&
10242               name[6] == 'n' &&
10243               name[7] == 'e')
10244           {                                       /* formline   */
10245             return -KEY_formline;
10246           }
10247
10248           goto unknown;
10249
10250         case 'g':
10251           if (name[1] == 'e' &&
10252               name[2] == 't')
10253           {
10254             switch (name[3])
10255             {
10256               case 'g':
10257                 if (name[4] == 'r')
10258                 {
10259                   switch (name[5])
10260                   {
10261                     case 'e':
10262                       if (name[6] == 'n' &&
10263                           name[7] == 't')
10264                       {                           /* getgrent   */
10265                         return -KEY_getgrent;
10266                       }
10267
10268                       goto unknown;
10269
10270                     case 'g':
10271                       if (name[6] == 'i' &&
10272                           name[7] == 'd')
10273                       {                           /* getgrgid   */
10274                         return -KEY_getgrgid;
10275                       }
10276
10277                       goto unknown;
10278
10279                     case 'n':
10280                       if (name[6] == 'a' &&
10281                           name[7] == 'm')
10282                       {                           /* getgrnam   */
10283                         return -KEY_getgrnam;
10284                       }
10285
10286                       goto unknown;
10287
10288                     default:
10289                       goto unknown;
10290                   }
10291                 }
10292
10293                 goto unknown;
10294
10295               case 'l':
10296                 if (name[4] == 'o' &&
10297                     name[5] == 'g' &&
10298                     name[6] == 'i' &&
10299                     name[7] == 'n')
10300                 {                                 /* getlogin   */
10301                   return -KEY_getlogin;
10302                 }
10303
10304                 goto unknown;
10305
10306               case 'p':
10307                 if (name[4] == 'w')
10308                 {
10309                   switch (name[5])
10310                   {
10311                     case 'e':
10312                       if (name[6] == 'n' &&
10313                           name[7] == 't')
10314                       {                           /* getpwent   */
10315                         return -KEY_getpwent;
10316                       }
10317
10318                       goto unknown;
10319
10320                     case 'n':
10321                       if (name[6] == 'a' &&
10322                           name[7] == 'm')
10323                       {                           /* getpwnam   */
10324                         return -KEY_getpwnam;
10325                       }
10326
10327                       goto unknown;
10328
10329                     case 'u':
10330                       if (name[6] == 'i' &&
10331                           name[7] == 'd')
10332                       {                           /* getpwuid   */
10333                         return -KEY_getpwuid;
10334                       }
10335
10336                       goto unknown;
10337
10338                     default:
10339                       goto unknown;
10340                   }
10341                 }
10342
10343                 goto unknown;
10344
10345               default:
10346                 goto unknown;
10347             }
10348           }
10349
10350           goto unknown;
10351
10352         case 'r':
10353           if (name[1] == 'e' &&
10354               name[2] == 'a' &&
10355               name[3] == 'd')
10356           {
10357             switch (name[4])
10358             {
10359               case 'l':
10360                 if (name[5] == 'i' &&
10361                     name[6] == 'n')
10362                 {
10363                   switch (name[7])
10364                   {
10365                     case 'e':
10366                       {                           /* readline   */
10367                         return -KEY_readline;
10368                       }
10369
10370                     case 'k':
10371                       {                           /* readlink   */
10372                         return -KEY_readlink;
10373                       }
10374
10375                     default:
10376                       goto unknown;
10377                   }
10378                 }
10379
10380                 goto unknown;
10381
10382               case 'p':
10383                 if (name[5] == 'i' &&
10384                     name[6] == 'p' &&
10385                     name[7] == 'e')
10386                 {                                 /* readpipe   */
10387                   return -KEY_readpipe;
10388                 }
10389
10390                 goto unknown;
10391
10392               default:
10393                 goto unknown;
10394             }
10395           }
10396
10397           goto unknown;
10398
10399         case 's':
10400           switch (name[1])
10401           {
10402             case 'e':
10403               if (name[2] == 't')
10404               {
10405                 switch (name[3])
10406                 {
10407                   case 'g':
10408                     if (name[4] == 'r' &&
10409                         name[5] == 'e' &&
10410                         name[6] == 'n' &&
10411                         name[7] == 't')
10412                     {                             /* setgrent   */
10413                       return -KEY_setgrent;
10414                     }
10415
10416                     goto unknown;
10417
10418                   case 'p':
10419                     if (name[4] == 'w' &&
10420                         name[5] == 'e' &&
10421                         name[6] == 'n' &&
10422                         name[7] == 't')
10423                     {                             /* setpwent   */
10424                       return -KEY_setpwent;
10425                     }
10426
10427                     goto unknown;
10428
10429                   default:
10430                     goto unknown;
10431                 }
10432               }
10433
10434               goto unknown;
10435
10436             case 'h':
10437               switch (name[2])
10438               {
10439                 case 'm':
10440                   if (name[3] == 'w' &&
10441                       name[4] == 'r' &&
10442                       name[5] == 'i' &&
10443                       name[6] == 't' &&
10444                       name[7] == 'e')
10445                   {                               /* shmwrite   */
10446                     return -KEY_shmwrite;
10447                   }
10448
10449                   goto unknown;
10450
10451                 case 'u':
10452                   if (name[3] == 't' &&
10453                       name[4] == 'd' &&
10454                       name[5] == 'o' &&
10455                       name[6] == 'w' &&
10456                       name[7] == 'n')
10457                   {                               /* shutdown   */
10458                     return -KEY_shutdown;
10459                   }
10460
10461                   goto unknown;
10462
10463                 default:
10464                   goto unknown;
10465               }
10466
10467             case 'y':
10468               if (name[2] == 's' &&
10469                   name[3] == 'w' &&
10470                   name[4] == 'r' &&
10471                   name[5] == 'i' &&
10472                   name[6] == 't' &&
10473                   name[7] == 'e')
10474               {                                   /* syswrite   */
10475                 return -KEY_syswrite;
10476               }
10477
10478               goto unknown;
10479
10480             default:
10481               goto unknown;
10482           }
10483
10484         case 't':
10485           if (name[1] == 'r' &&
10486               name[2] == 'u' &&
10487               name[3] == 'n' &&
10488               name[4] == 'c' &&
10489               name[5] == 'a' &&
10490               name[6] == 't' &&
10491               name[7] == 'e')
10492           {                                       /* truncate   */
10493             return -KEY_truncate;
10494           }
10495
10496           goto unknown;
10497
10498         default:
10499           goto unknown;
10500       }
10501
10502     case 9: /* 9 tokens of length 9 */
10503       switch (name[0])
10504       {
10505         case 'U':
10506           if (name[1] == 'N' &&
10507               name[2] == 'I' &&
10508               name[3] == 'T' &&
10509               name[4] == 'C' &&
10510               name[5] == 'H' &&
10511               name[6] == 'E' &&
10512               name[7] == 'C' &&
10513               name[8] == 'K')
10514           {                                       /* UNITCHECK  */
10515             return KEY_UNITCHECK;
10516           }
10517
10518           goto unknown;
10519
10520         case 'e':
10521           if (name[1] == 'n' &&
10522               name[2] == 'd' &&
10523               name[3] == 'n' &&
10524               name[4] == 'e' &&
10525               name[5] == 't' &&
10526               name[6] == 'e' &&
10527               name[7] == 'n' &&
10528               name[8] == 't')
10529           {                                       /* endnetent  */
10530             return -KEY_endnetent;
10531           }
10532
10533           goto unknown;
10534
10535         case 'g':
10536           if (name[1] == 'e' &&
10537               name[2] == 't' &&
10538               name[3] == 'n' &&
10539               name[4] == 'e' &&
10540               name[5] == 't' &&
10541               name[6] == 'e' &&
10542               name[7] == 'n' &&
10543               name[8] == 't')
10544           {                                       /* getnetent  */
10545             return -KEY_getnetent;
10546           }
10547
10548           goto unknown;
10549
10550         case 'l':
10551           if (name[1] == 'o' &&
10552               name[2] == 'c' &&
10553               name[3] == 'a' &&
10554               name[4] == 'l' &&
10555               name[5] == 't' &&
10556               name[6] == 'i' &&
10557               name[7] == 'm' &&
10558               name[8] == 'e')
10559           {                                       /* localtime  */
10560             return -KEY_localtime;
10561           }
10562
10563           goto unknown;
10564
10565         case 'p':
10566           if (name[1] == 'r' &&
10567               name[2] == 'o' &&
10568               name[3] == 't' &&
10569               name[4] == 'o' &&
10570               name[5] == 't' &&
10571               name[6] == 'y' &&
10572               name[7] == 'p' &&
10573               name[8] == 'e')
10574           {                                       /* prototype  */
10575             return KEY_prototype;
10576           }
10577
10578           goto unknown;
10579
10580         case 'q':
10581           if (name[1] == 'u' &&
10582               name[2] == 'o' &&
10583               name[3] == 't' &&
10584               name[4] == 'e' &&
10585               name[5] == 'm' &&
10586               name[6] == 'e' &&
10587               name[7] == 't' &&
10588               name[8] == 'a')
10589           {                                       /* quotemeta  */
10590             return -KEY_quotemeta;
10591           }
10592
10593           goto unknown;
10594
10595         case 'r':
10596           if (name[1] == 'e' &&
10597               name[2] == 'w' &&
10598               name[3] == 'i' &&
10599               name[4] == 'n' &&
10600               name[5] == 'd' &&
10601               name[6] == 'd' &&
10602               name[7] == 'i' &&
10603               name[8] == 'r')
10604           {                                       /* rewinddir  */
10605             return -KEY_rewinddir;
10606           }
10607
10608           goto unknown;
10609
10610         case 's':
10611           if (name[1] == 'e' &&
10612               name[2] == 't' &&
10613               name[3] == 'n' &&
10614               name[4] == 'e' &&
10615               name[5] == 't' &&
10616               name[6] == 'e' &&
10617               name[7] == 'n' &&
10618               name[8] == 't')
10619           {                                       /* setnetent  */
10620             return -KEY_setnetent;
10621           }
10622
10623           goto unknown;
10624
10625         case 'w':
10626           if (name[1] == 'a' &&
10627               name[2] == 'n' &&
10628               name[3] == 't' &&
10629               name[4] == 'a' &&
10630               name[5] == 'r' &&
10631               name[6] == 'r' &&
10632               name[7] == 'a' &&
10633               name[8] == 'y')
10634           {                                       /* wantarray  */
10635             return -KEY_wantarray;
10636           }
10637
10638           goto unknown;
10639
10640         default:
10641           goto unknown;
10642       }
10643
10644     case 10: /* 9 tokens of length 10 */
10645       switch (name[0])
10646       {
10647         case 'e':
10648           if (name[1] == 'n' &&
10649               name[2] == 'd')
10650           {
10651             switch (name[3])
10652             {
10653               case 'h':
10654                 if (name[4] == 'o' &&
10655                     name[5] == 's' &&
10656                     name[6] == 't' &&
10657                     name[7] == 'e' &&
10658                     name[8] == 'n' &&
10659                     name[9] == 't')
10660                 {                                 /* endhostent */
10661                   return -KEY_endhostent;
10662                 }
10663
10664                 goto unknown;
10665
10666               case 's':
10667                 if (name[4] == 'e' &&
10668                     name[5] == 'r' &&
10669                     name[6] == 'v' &&
10670                     name[7] == 'e' &&
10671                     name[8] == 'n' &&
10672                     name[9] == 't')
10673                 {                                 /* endservent */
10674                   return -KEY_endservent;
10675                 }
10676
10677                 goto unknown;
10678
10679               default:
10680                 goto unknown;
10681             }
10682           }
10683
10684           goto unknown;
10685
10686         case 'g':
10687           if (name[1] == 'e' &&
10688               name[2] == 't')
10689           {
10690             switch (name[3])
10691             {
10692               case 'h':
10693                 if (name[4] == 'o' &&
10694                     name[5] == 's' &&
10695                     name[6] == 't' &&
10696                     name[7] == 'e' &&
10697                     name[8] == 'n' &&
10698                     name[9] == 't')
10699                 {                                 /* gethostent */
10700                   return -KEY_gethostent;
10701                 }
10702
10703                 goto unknown;
10704
10705               case 's':
10706                 switch (name[4])
10707                 {
10708                   case 'e':
10709                     if (name[5] == 'r' &&
10710                         name[6] == 'v' &&
10711                         name[7] == 'e' &&
10712                         name[8] == 'n' &&
10713                         name[9] == 't')
10714                     {                             /* getservent */
10715                       return -KEY_getservent;
10716                     }
10717
10718                     goto unknown;
10719
10720                   case 'o':
10721                     if (name[5] == 'c' &&
10722                         name[6] == 'k' &&
10723                         name[7] == 'o' &&
10724                         name[8] == 'p' &&
10725                         name[9] == 't')
10726                     {                             /* getsockopt */
10727                       return -KEY_getsockopt;
10728                     }
10729
10730                     goto unknown;
10731
10732                   default:
10733                     goto unknown;
10734                 }
10735
10736               default:
10737                 goto unknown;
10738             }
10739           }
10740
10741           goto unknown;
10742
10743         case 's':
10744           switch (name[1])
10745           {
10746             case 'e':
10747               if (name[2] == 't')
10748               {
10749                 switch (name[3])
10750                 {
10751                   case 'h':
10752                     if (name[4] == 'o' &&
10753                         name[5] == 's' &&
10754                         name[6] == 't' &&
10755                         name[7] == 'e' &&
10756                         name[8] == 'n' &&
10757                         name[9] == 't')
10758                     {                             /* sethostent */
10759                       return -KEY_sethostent;
10760                     }
10761
10762                     goto unknown;
10763
10764                   case 's':
10765                     switch (name[4])
10766                     {
10767                       case 'e':
10768                         if (name[5] == 'r' &&
10769                             name[6] == 'v' &&
10770                             name[7] == 'e' &&
10771                             name[8] == 'n' &&
10772                             name[9] == 't')
10773                         {                         /* setservent */
10774                           return -KEY_setservent;
10775                         }
10776
10777                         goto unknown;
10778
10779                       case 'o':
10780                         if (name[5] == 'c' &&
10781                             name[6] == 'k' &&
10782                             name[7] == 'o' &&
10783                             name[8] == 'p' &&
10784                             name[9] == 't')
10785                         {                         /* setsockopt */
10786                           return -KEY_setsockopt;
10787                         }
10788
10789                         goto unknown;
10790
10791                       default:
10792                         goto unknown;
10793                     }
10794
10795                   default:
10796                     goto unknown;
10797                 }
10798               }
10799
10800               goto unknown;
10801
10802             case 'o':
10803               if (name[2] == 'c' &&
10804                   name[3] == 'k' &&
10805                   name[4] == 'e' &&
10806                   name[5] == 't' &&
10807                   name[6] == 'p' &&
10808                   name[7] == 'a' &&
10809                   name[8] == 'i' &&
10810                   name[9] == 'r')
10811               {                                   /* socketpair */
10812                 return -KEY_socketpair;
10813               }
10814
10815               goto unknown;
10816
10817             default:
10818               goto unknown;
10819           }
10820
10821         default:
10822           goto unknown;
10823       }
10824
10825     case 11: /* 8 tokens of length 11 */
10826       switch (name[0])
10827       {
10828         case '_':
10829           if (name[1] == '_' &&
10830               name[2] == 'P' &&
10831               name[3] == 'A' &&
10832               name[4] == 'C' &&
10833               name[5] == 'K' &&
10834               name[6] == 'A' &&
10835               name[7] == 'G' &&
10836               name[8] == 'E' &&
10837               name[9] == '_' &&
10838               name[10] == '_')
10839           {                                       /* __PACKAGE__ */
10840             return -KEY___PACKAGE__;
10841           }
10842
10843           goto unknown;
10844
10845         case 'e':
10846           if (name[1] == 'n' &&
10847               name[2] == 'd' &&
10848               name[3] == 'p' &&
10849               name[4] == 'r' &&
10850               name[5] == 'o' &&
10851               name[6] == 't' &&
10852               name[7] == 'o' &&
10853               name[8] == 'e' &&
10854               name[9] == 'n' &&
10855               name[10] == 't')
10856           {                                       /* endprotoent */
10857             return -KEY_endprotoent;
10858           }
10859
10860           goto unknown;
10861
10862         case 'g':
10863           if (name[1] == 'e' &&
10864               name[2] == 't')
10865           {
10866             switch (name[3])
10867             {
10868               case 'p':
10869                 switch (name[4])
10870                 {
10871                   case 'e':
10872                     if (name[5] == 'e' &&
10873                         name[6] == 'r' &&
10874                         name[7] == 'n' &&
10875                         name[8] == 'a' &&
10876                         name[9] == 'm' &&
10877                         name[10] == 'e')
10878                     {                             /* getpeername */
10879                       return -KEY_getpeername;
10880                     }
10881
10882                     goto unknown;
10883
10884                   case 'r':
10885                     switch (name[5])
10886                     {
10887                       case 'i':
10888                         if (name[6] == 'o' &&
10889                             name[7] == 'r' &&
10890                             name[8] == 'i' &&
10891                             name[9] == 't' &&
10892                             name[10] == 'y')
10893                         {                         /* getpriority */
10894                           return -KEY_getpriority;
10895                         }
10896
10897                         goto unknown;
10898
10899                       case 'o':
10900                         if (name[6] == 't' &&
10901                             name[7] == 'o' &&
10902                             name[8] == 'e' &&
10903                             name[9] == 'n' &&
10904                             name[10] == 't')
10905                         {                         /* getprotoent */
10906                           return -KEY_getprotoent;
10907                         }
10908
10909                         goto unknown;
10910
10911                       default:
10912                         goto unknown;
10913                     }
10914
10915                   default:
10916                     goto unknown;
10917                 }
10918
10919               case 's':
10920                 if (name[4] == 'o' &&
10921                     name[5] == 'c' &&
10922                     name[6] == 'k' &&
10923                     name[7] == 'n' &&
10924                     name[8] == 'a' &&
10925                     name[9] == 'm' &&
10926                     name[10] == 'e')
10927                 {                                 /* getsockname */
10928                   return -KEY_getsockname;
10929                 }
10930
10931                 goto unknown;
10932
10933               default:
10934                 goto unknown;
10935             }
10936           }
10937
10938           goto unknown;
10939
10940         case 's':
10941           if (name[1] == 'e' &&
10942               name[2] == 't' &&
10943               name[3] == 'p' &&
10944               name[4] == 'r')
10945           {
10946             switch (name[5])
10947             {
10948               case 'i':
10949                 if (name[6] == 'o' &&
10950                     name[7] == 'r' &&
10951                     name[8] == 'i' &&
10952                     name[9] == 't' &&
10953                     name[10] == 'y')
10954                 {                                 /* setpriority */
10955                   return -KEY_setpriority;
10956                 }
10957
10958                 goto unknown;
10959
10960               case 'o':
10961                 if (name[6] == 't' &&
10962                     name[7] == 'o' &&
10963                     name[8] == 'e' &&
10964                     name[9] == 'n' &&
10965                     name[10] == 't')
10966                 {                                 /* setprotoent */
10967                   return -KEY_setprotoent;
10968                 }
10969
10970                 goto unknown;
10971
10972               default:
10973                 goto unknown;
10974             }
10975           }
10976
10977           goto unknown;
10978
10979         default:
10980           goto unknown;
10981       }
10982
10983     case 12: /* 2 tokens of length 12 */
10984       if (name[0] == 'g' &&
10985           name[1] == 'e' &&
10986           name[2] == 't' &&
10987           name[3] == 'n' &&
10988           name[4] == 'e' &&
10989           name[5] == 't' &&
10990           name[6] == 'b' &&
10991           name[7] == 'y')
10992       {
10993         switch (name[8])
10994         {
10995           case 'a':
10996             if (name[9] == 'd' &&
10997                 name[10] == 'd' &&
10998                 name[11] == 'r')
10999             {                                     /* getnetbyaddr */
11000               return -KEY_getnetbyaddr;
11001             }
11002
11003             goto unknown;
11004
11005           case 'n':
11006             if (name[9] == 'a' &&
11007                 name[10] == 'm' &&
11008                 name[11] == 'e')
11009             {                                     /* getnetbyname */
11010               return -KEY_getnetbyname;
11011             }
11012
11013             goto unknown;
11014
11015           default:
11016             goto unknown;
11017         }
11018       }
11019
11020       goto unknown;
11021
11022     case 13: /* 4 tokens of length 13 */
11023       if (name[0] == 'g' &&
11024           name[1] == 'e' &&
11025           name[2] == 't')
11026       {
11027         switch (name[3])
11028         {
11029           case 'h':
11030             if (name[4] == 'o' &&
11031                 name[5] == 's' &&
11032                 name[6] == 't' &&
11033                 name[7] == 'b' &&
11034                 name[8] == 'y')
11035             {
11036               switch (name[9])
11037               {
11038                 case 'a':
11039                   if (name[10] == 'd' &&
11040                       name[11] == 'd' &&
11041                       name[12] == 'r')
11042                   {                               /* gethostbyaddr */
11043                     return -KEY_gethostbyaddr;
11044                   }
11045
11046                   goto unknown;
11047
11048                 case 'n':
11049                   if (name[10] == 'a' &&
11050                       name[11] == 'm' &&
11051                       name[12] == 'e')
11052                   {                               /* gethostbyname */
11053                     return -KEY_gethostbyname;
11054                   }
11055
11056                   goto unknown;
11057
11058                 default:
11059                   goto unknown;
11060               }
11061             }
11062
11063             goto unknown;
11064
11065           case 's':
11066             if (name[4] == 'e' &&
11067                 name[5] == 'r' &&
11068                 name[6] == 'v' &&
11069                 name[7] == 'b' &&
11070                 name[8] == 'y')
11071             {
11072               switch (name[9])
11073               {
11074                 case 'n':
11075                   if (name[10] == 'a' &&
11076                       name[11] == 'm' &&
11077                       name[12] == 'e')
11078                   {                               /* getservbyname */
11079                     return -KEY_getservbyname;
11080                   }
11081
11082                   goto unknown;
11083
11084                 case 'p':
11085                   if (name[10] == 'o' &&
11086                       name[11] == 'r' &&
11087                       name[12] == 't')
11088                   {                               /* getservbyport */
11089                     return -KEY_getservbyport;
11090                   }
11091
11092                   goto unknown;
11093
11094                 default:
11095                   goto unknown;
11096               }
11097             }
11098
11099             goto unknown;
11100
11101           default:
11102             goto unknown;
11103         }
11104       }
11105
11106       goto unknown;
11107
11108     case 14: /* 1 tokens of length 14 */
11109       if (name[0] == 'g' &&
11110           name[1] == 'e' &&
11111           name[2] == 't' &&
11112           name[3] == 'p' &&
11113           name[4] == 'r' &&
11114           name[5] == 'o' &&
11115           name[6] == 't' &&
11116           name[7] == 'o' &&
11117           name[8] == 'b' &&
11118           name[9] == 'y' &&
11119           name[10] == 'n' &&
11120           name[11] == 'a' &&
11121           name[12] == 'm' &&
11122           name[13] == 'e')
11123       {                                           /* getprotobyname */
11124         return -KEY_getprotobyname;
11125       }
11126
11127       goto unknown;
11128
11129     case 16: /* 1 tokens of length 16 */
11130       if (name[0] == 'g' &&
11131           name[1] == 'e' &&
11132           name[2] == 't' &&
11133           name[3] == 'p' &&
11134           name[4] == 'r' &&
11135           name[5] == 'o' &&
11136           name[6] == 't' &&
11137           name[7] == 'o' &&
11138           name[8] == 'b' &&
11139           name[9] == 'y' &&
11140           name[10] == 'n' &&
11141           name[11] == 'u' &&
11142           name[12] == 'm' &&
11143           name[13] == 'b' &&
11144           name[14] == 'e' &&
11145           name[15] == 'r')
11146       {                                           /* getprotobynumber */
11147         return -KEY_getprotobynumber;
11148       }
11149
11150       goto unknown;
11151
11152     default:
11153       goto unknown;
11154   }
11155
11156 unknown:
11157   return 0;
11158 }
11159
11160 STATIC void
11161 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11162 {
11163     dVAR;
11164
11165     PERL_ARGS_ASSERT_CHECKCOMMA;
11166
11167     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11168         if (ckWARN(WARN_SYNTAX)) {
11169             int level = 1;
11170             const char *w;
11171             for (w = s+2; *w && level; w++) {
11172                 if (*w == '(')
11173                     ++level;
11174                 else if (*w == ')')
11175                     --level;
11176             }
11177             while (isSPACE(*w))
11178                 ++w;
11179             /* the list of chars below is for end of statements or
11180              * block / parens, boolean operators (&&, ||, //) and branch
11181              * constructs (or, and, if, until, unless, while, err, for).
11182              * Not a very solid hack... */
11183             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11184                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11185                             "%s (...) interpreted as function",name);
11186         }
11187     }
11188     while (s < PL_bufend && isSPACE(*s))
11189         s++;
11190     if (*s == '(')
11191         s++;
11192     while (s < PL_bufend && isSPACE(*s))
11193         s++;
11194     if (isIDFIRST_lazy_if(s,UTF)) {
11195         const char * const w = s++;
11196         while (isALNUM_lazy_if(s,UTF))
11197             s++;
11198         while (s < PL_bufend && isSPACE(*s))
11199             s++;
11200         if (*s == ',') {
11201             GV* gv;
11202             if (keyword(w, s - w, 0))
11203                 return;
11204
11205             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11206             if (gv && GvCVu(gv))
11207                 return;
11208             Perl_croak(aTHX_ "No comma allowed after %s", what);
11209         }
11210     }
11211 }
11212
11213 /* Either returns sv, or mortalizes sv and returns a new SV*.
11214    Best used as sv=new_constant(..., sv, ...).
11215    If s, pv are NULL, calls subroutine with one argument,
11216    and type is used with error messages only. */
11217
11218 STATIC SV *
11219 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11220                SV *sv, SV *pv, const char *type, STRLEN typelen)
11221 {
11222     dVAR; dSP;
11223     HV * const table = GvHV(PL_hintgv);          /* ^H */
11224     SV *res;
11225     SV **cvp;
11226     SV *cv, *typesv;
11227     const char *why1 = "", *why2 = "", *why3 = "";
11228
11229     PERL_ARGS_ASSERT_NEW_CONSTANT;
11230
11231     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11232         SV *msg;
11233
11234         why2 = (const char *)
11235             (strEQ(key,"charnames")
11236              ? "(possibly a missing \"use charnames ...\")"
11237              : "");
11238         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11239                             (type ? type: "undef"), why2);
11240
11241         /* This is convoluted and evil ("goto considered harmful")
11242          * but I do not understand the intricacies of all the different
11243          * failure modes of %^H in here.  The goal here is to make
11244          * the most probable error message user-friendly. --jhi */
11245
11246         goto msgdone;
11247
11248     report:
11249         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11250                             (type ? type: "undef"), why1, why2, why3);
11251     msgdone:
11252         yyerror(SvPVX_const(msg));
11253         SvREFCNT_dec(msg);
11254         return sv;
11255     }
11256     cvp = hv_fetch(table, key, keylen, FALSE);
11257     if (!cvp || !SvOK(*cvp)) {
11258         why1 = "$^H{";
11259         why2 = key;
11260         why3 = "} is not defined";
11261         goto report;
11262     }
11263     sv_2mortal(sv);                     /* Parent created it permanently */
11264     cv = *cvp;
11265     if (!pv && s)
11266         pv = newSVpvn_flags(s, len, SVs_TEMP);
11267     if (type && pv)
11268         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11269     else
11270         typesv = &PL_sv_undef;
11271
11272     PUSHSTACKi(PERLSI_OVERLOAD);
11273     ENTER ;
11274     SAVETMPS;
11275
11276     PUSHMARK(SP) ;
11277     EXTEND(sp, 3);
11278     if (pv)
11279         PUSHs(pv);
11280     PUSHs(sv);
11281     if (pv)
11282         PUSHs(typesv);
11283     PUTBACK;
11284     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11285
11286     SPAGAIN ;
11287
11288     /* Check the eval first */
11289     if (!PL_in_eval && SvTRUE(ERRSV)) {
11290         sv_catpvs(ERRSV, "Propagated");
11291         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11292         (void)POPs;
11293         res = SvREFCNT_inc_simple(sv);
11294     }
11295     else {
11296         res = POPs;
11297         SvREFCNT_inc_simple_void(res);
11298     }
11299
11300     PUTBACK ;
11301     FREETMPS ;
11302     LEAVE ;
11303     POPSTACK;
11304
11305     if (!SvOK(res)) {
11306         why1 = "Call to &{$^H{";
11307         why2 = key;
11308         why3 = "}} did not return a defined value";
11309         sv = res;
11310         goto report;
11311     }
11312
11313     return res;
11314 }
11315
11316 /* Returns a NUL terminated string, with the length of the string written to
11317    *slp
11318    */
11319 STATIC char *
11320 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11321 {
11322     dVAR;
11323     register char *d = dest;
11324     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11325
11326     PERL_ARGS_ASSERT_SCAN_WORD;
11327
11328     for (;;) {
11329         if (d >= e)
11330             Perl_croak(aTHX_ ident_too_long);
11331         if (isALNUM(*s))        /* UTF handled below */
11332             *d++ = *s++;
11333         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11334             *d++ = ':';
11335             *d++ = ':';
11336             s++;
11337         }
11338         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11339             *d++ = *s++;
11340             *d++ = *s++;
11341         }
11342         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11343             char *t = s + UTF8SKIP(s);
11344             size_t len;
11345             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11346                 t += UTF8SKIP(t);
11347             len = t - s;
11348             if (d + len > e)
11349                 Perl_croak(aTHX_ ident_too_long);
11350             Copy(s, d, len, char);
11351             d += len;
11352             s = t;
11353         }
11354         else {
11355             *d = '\0';
11356             *slp = d - dest;
11357             return s;
11358         }
11359     }
11360 }
11361
11362 STATIC char *
11363 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11364 {
11365     dVAR;
11366     char *bracket = NULL;
11367     char funny = *s++;
11368     register char *d = dest;
11369     register char * const e = d + destlen + 3;    /* two-character token, ending NUL */
11370
11371     PERL_ARGS_ASSERT_SCAN_IDENT;
11372
11373     if (isSPACE(*s))
11374         s = PEEKSPACE(s);
11375     if (isDIGIT(*s)) {
11376         while (isDIGIT(*s)) {
11377             if (d >= e)
11378                 Perl_croak(aTHX_ ident_too_long);
11379             *d++ = *s++;
11380         }
11381     }
11382     else {
11383         for (;;) {
11384             if (d >= e)
11385                 Perl_croak(aTHX_ ident_too_long);
11386             if (isALNUM(*s))    /* UTF handled below */
11387                 *d++ = *s++;
11388             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11389                 *d++ = ':';
11390                 *d++ = ':';
11391                 s++;
11392             }
11393             else if (*s == ':' && s[1] == ':') {
11394                 *d++ = *s++;
11395                 *d++ = *s++;
11396             }
11397             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11398                 char *t = s + UTF8SKIP(s);
11399                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11400                     t += UTF8SKIP(t);
11401                 if (d + (t - s) > e)
11402                     Perl_croak(aTHX_ ident_too_long);
11403                 Copy(s, d, t - s, char);
11404                 d += t - s;
11405                 s = t;
11406             }
11407             else
11408                 break;
11409         }
11410     }
11411     *d = '\0';
11412     d = dest;
11413     if (*d) {
11414         if (PL_lex_state != LEX_NORMAL)
11415             PL_lex_state = LEX_INTERPENDMAYBE;
11416         return s;
11417     }
11418     if (*s == '$' && s[1] &&
11419         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11420     {
11421         return s;
11422     }
11423     if (*s == '{') {
11424         bracket = s;
11425         s++;
11426     }
11427     else if (ck_uni)
11428         check_uni();
11429     if (s < send)
11430         *d = *s++;
11431     d[1] = '\0';
11432     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11433         *d = toCTRL(*s);
11434         s++;
11435     }
11436     if (bracket) {
11437         if (isSPACE(s[-1])) {
11438             while (s < send) {
11439                 const char ch = *s++;
11440                 if (!SPACE_OR_TAB(ch)) {
11441                     *d = ch;
11442                     break;
11443                 }
11444             }
11445         }
11446         if (isIDFIRST_lazy_if(d,UTF)) {
11447             d++;
11448             if (UTF) {
11449                 char *end = s;
11450                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11451                     end += UTF8SKIP(end);
11452                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11453                         end += UTF8SKIP(end);
11454                 }
11455                 Copy(s, d, end - s, char);
11456                 d += end - s;
11457                 s = end;
11458             }
11459             else {
11460                 while ((isALNUM(*s) || *s == ':') && d < e)
11461                     *d++ = *s++;
11462                 if (d >= e)
11463                     Perl_croak(aTHX_ ident_too_long);
11464             }
11465             *d = '\0';
11466             while (s < send && SPACE_OR_TAB(*s))
11467                 s++;
11468             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11469                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11470                     const char * const brack =
11471                         (const char *)
11472                         ((*s == '[') ? "[...]" : "{...}");
11473                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11474                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11475                         funny, dest, brack, funny, dest, brack);
11476                 }
11477                 bracket++;
11478                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11479                 return s;
11480             }
11481         }
11482         /* Handle extended ${^Foo} variables
11483          * 1999-02-27 mjd-perl-patch@plover.com */
11484         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11485                  && isALNUM(*s))
11486         {
11487             d++;
11488             while (isALNUM(*s) && d < e) {
11489                 *d++ = *s++;
11490             }
11491             if (d >= e)
11492                 Perl_croak(aTHX_ ident_too_long);
11493             *d = '\0';
11494         }
11495         if (*s == '}') {
11496             s++;
11497             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11498                 PL_lex_state = LEX_INTERPEND;
11499                 PL_expect = XREF;
11500             }
11501             if (PL_lex_state == LEX_NORMAL) {
11502                 if (ckWARN(WARN_AMBIGUOUS) &&
11503                     (keyword(dest, d - dest, 0)
11504                      || get_cvn_flags(dest, d - dest, 0)))
11505                 {
11506                     if (funny == '#')
11507                         funny = '@';
11508                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11509                         "Ambiguous use of %c{%s} resolved to %c%s",
11510                         funny, dest, funny, dest);
11511                 }
11512             }
11513         }
11514         else {
11515             s = bracket;                /* let the parser handle it */
11516             *dest = '\0';
11517         }
11518     }
11519     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11520         PL_lex_state = LEX_INTERPEND;
11521     return s;
11522 }
11523
11524 static U32
11525 S_pmflag(U32 pmfl, const char ch) {
11526     switch (ch) {
11527         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11528     case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
11529     case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
11530     case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
11531     case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
11532     }
11533     return pmfl;
11534 }
11535
11536 void
11537 Perl_pmflag(pTHX_ U32* pmfl, int ch)
11538 {
11539     PERL_ARGS_ASSERT_PMFLAG;
11540
11541     Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11542                      "Perl_pmflag() is deprecated, and will be removed from the XS API");
11543
11544     if (ch<256) {
11545         *pmfl = S_pmflag(*pmfl, (char)ch);
11546     }
11547 }
11548
11549 STATIC char *
11550 S_scan_pat(pTHX_ char *start, I32 type)
11551 {
11552     dVAR;
11553     PMOP *pm;
11554     char *s = scan_str(start,!!PL_madskills,FALSE);
11555     const char * const valid_flags =
11556         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11557 #ifdef PERL_MAD
11558     char *modstart;
11559 #endif
11560
11561     PERL_ARGS_ASSERT_SCAN_PAT;
11562
11563     if (!s) {
11564         const char * const delimiter = skipspace(start);
11565         Perl_croak(aTHX_
11566                    (const char *)
11567                    (*delimiter == '?'
11568                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11569                     : "Search pattern not terminated" ));
11570     }
11571
11572     pm = (PMOP*)newPMOP(type, 0);
11573     if (PL_multi_open == '?') {
11574         /* This is the only point in the code that sets PMf_ONCE:  */
11575         pm->op_pmflags |= PMf_ONCE;
11576
11577         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11578            allows us to restrict the list needed by reset to just the ??
11579            matches.  */
11580         assert(type != OP_TRANS);
11581         if (PL_curstash) {
11582             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11583             U32 elements;
11584             if (!mg) {
11585                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11586                                  0);
11587             }
11588             elements = mg->mg_len / sizeof(PMOP**);
11589             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11590             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11591             mg->mg_len = elements * sizeof(PMOP**);
11592             PmopSTASH_set(pm,PL_curstash);
11593         }
11594     }
11595 #ifdef PERL_MAD
11596     modstart = s;
11597 #endif
11598     while (*s && strchr(valid_flags, *s))
11599         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11600 #ifdef PERL_MAD
11601     if (PL_madskills && modstart != s) {
11602         SV* tmptoken = newSVpvn(modstart, s - modstart);
11603         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11604     }
11605 #endif
11606     /* issue a warning if /c is specified,but /g is not */
11607     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11608     {
11609         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11610                        "Use of /c modifier is meaningless without /g" );
11611     }
11612
11613     PL_lex_op = (OP*)pm;
11614     pl_yylval.ival = OP_MATCH;
11615     return s;
11616 }
11617
11618 STATIC char *
11619 S_scan_subst(pTHX_ char *start)
11620 {
11621     dVAR;
11622     register char *s;
11623     register PMOP *pm;
11624     I32 first_start;
11625     I32 es = 0;
11626 #ifdef PERL_MAD
11627     char *modstart;
11628 #endif
11629
11630     PERL_ARGS_ASSERT_SCAN_SUBST;
11631
11632     pl_yylval.ival = OP_NULL;
11633
11634     s = scan_str(start,!!PL_madskills,FALSE);
11635
11636     if (!s)
11637         Perl_croak(aTHX_ "Substitution pattern not terminated");
11638
11639     if (s[-1] == PL_multi_open)
11640         s--;
11641 #ifdef PERL_MAD
11642     if (PL_madskills) {
11643         CURMAD('q', PL_thisopen);
11644         CURMAD('_', PL_thiswhite);
11645         CURMAD('E', PL_thisstuff);
11646         CURMAD('Q', PL_thisclose);
11647         PL_realtokenstart = s - SvPVX(PL_linestr);
11648     }
11649 #endif
11650
11651     first_start = PL_multi_start;
11652     s = scan_str(s,!!PL_madskills,FALSE);
11653     if (!s) {
11654         if (PL_lex_stuff) {
11655             SvREFCNT_dec(PL_lex_stuff);
11656             PL_lex_stuff = NULL;
11657         }
11658         Perl_croak(aTHX_ "Substitution replacement not terminated");
11659     }
11660     PL_multi_start = first_start;       /* so whole substitution is taken together */
11661
11662     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11663
11664 #ifdef PERL_MAD
11665     if (PL_madskills) {
11666         CURMAD('z', PL_thisopen);
11667         CURMAD('R', PL_thisstuff);
11668         CURMAD('Z', PL_thisclose);
11669     }
11670     modstart = s;
11671 #endif
11672
11673     while (*s) {
11674         if (*s == EXEC_PAT_MOD) {
11675             s++;
11676             es++;
11677         }
11678         else if (strchr(S_PAT_MODS, *s))
11679             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11680         else
11681             break;
11682     }
11683
11684 #ifdef PERL_MAD
11685     if (PL_madskills) {
11686         if (modstart != s)
11687             curmad('m', newSVpvn(modstart, s - modstart));
11688         append_madprops(PL_thismad, (OP*)pm, 0);
11689         PL_thismad = 0;
11690     }
11691 #endif
11692     if ((pm->op_pmflags & PMf_CONTINUE)) {
11693         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11694     }
11695
11696     if (es) {
11697         SV * const repl = newSVpvs("");
11698
11699         PL_sublex_info.super_bufptr = s;
11700         PL_sublex_info.super_bufend = PL_bufend;
11701         PL_multi_end = 0;
11702         pm->op_pmflags |= PMf_EVAL;
11703         while (es-- > 0) {
11704             if (es)
11705                 sv_catpvs(repl, "eval ");
11706             else
11707                 sv_catpvs(repl, "do ");
11708         }
11709         sv_catpvs(repl, "{");
11710         sv_catsv(repl, PL_lex_repl);
11711         if (strchr(SvPVX(PL_lex_repl), '#'))
11712             sv_catpvs(repl, "\n");
11713         sv_catpvs(repl, "}");
11714         SvEVALED_on(repl);
11715         SvREFCNT_dec(PL_lex_repl);
11716         PL_lex_repl = repl;
11717     }
11718
11719     PL_lex_op = (OP*)pm;
11720     pl_yylval.ival = OP_SUBST;
11721     return s;
11722 }
11723
11724 STATIC char *
11725 S_scan_trans(pTHX_ char *start)
11726 {
11727     dVAR;
11728     register char* s;
11729     OP *o;
11730     short *tbl;
11731     U8 squash;
11732     U8 del;
11733     U8 complement;
11734 #ifdef PERL_MAD
11735     char *modstart;
11736 #endif
11737
11738     PERL_ARGS_ASSERT_SCAN_TRANS;
11739
11740     pl_yylval.ival = OP_NULL;
11741
11742     s = scan_str(start,!!PL_madskills,FALSE);
11743     if (!s)
11744         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11745
11746     if (s[-1] == PL_multi_open)
11747         s--;
11748 #ifdef PERL_MAD
11749     if (PL_madskills) {
11750         CURMAD('q', PL_thisopen);
11751         CURMAD('_', PL_thiswhite);
11752         CURMAD('E', PL_thisstuff);
11753         CURMAD('Q', PL_thisclose);
11754         PL_realtokenstart = s - SvPVX(PL_linestr);
11755     }
11756 #endif
11757
11758     s = scan_str(s,!!PL_madskills,FALSE);
11759     if (!s) {
11760         if (PL_lex_stuff) {
11761             SvREFCNT_dec(PL_lex_stuff);
11762             PL_lex_stuff = NULL;
11763         }
11764         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11765     }
11766     if (PL_madskills) {
11767         CURMAD('z', PL_thisopen);
11768         CURMAD('R', PL_thisstuff);
11769         CURMAD('Z', PL_thisclose);
11770     }
11771
11772     complement = del = squash = 0;
11773 #ifdef PERL_MAD
11774     modstart = s;
11775 #endif
11776     while (1) {
11777         switch (*s) {
11778         case 'c':
11779             complement = OPpTRANS_COMPLEMENT;
11780             break;
11781         case 'd':
11782             del = OPpTRANS_DELETE;
11783             break;
11784         case 's':
11785             squash = OPpTRANS_SQUASH;
11786             break;
11787         default:
11788             goto no_more;
11789         }
11790         s++;
11791     }
11792   no_more:
11793
11794     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11795     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11796     o->op_private &= ~OPpTRANS_ALL;
11797     o->op_private |= del|squash|complement|
11798       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11799       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11800
11801     PL_lex_op = o;
11802     pl_yylval.ival = OP_TRANS;
11803
11804 #ifdef PERL_MAD
11805     if (PL_madskills) {
11806         if (modstart != s)
11807             curmad('m', newSVpvn(modstart, s - modstart));
11808         append_madprops(PL_thismad, o, 0);
11809         PL_thismad = 0;
11810     }
11811 #endif
11812
11813     return s;
11814 }
11815
11816 STATIC char *
11817 S_scan_heredoc(pTHX_ register char *s)
11818 {
11819     dVAR;
11820     SV *herewas;
11821     I32 op_type = OP_SCALAR;
11822     I32 len;
11823     SV *tmpstr;
11824     char term;
11825     const char *found_newline;
11826     register char *d;
11827     register char *e;
11828     char *peek;
11829     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11830 #ifdef PERL_MAD
11831     I32 stuffstart = s - SvPVX(PL_linestr);
11832     char *tstart;
11833
11834     PL_realtokenstart = -1;
11835 #endif
11836
11837     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11838
11839     s += 2;
11840     d = PL_tokenbuf;
11841     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11842     if (!outer)
11843         *d++ = '\n';
11844     peek = s;
11845     while (SPACE_OR_TAB(*peek))
11846         peek++;
11847     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11848         s = peek;
11849         term = *s++;
11850         s = delimcpy(d, e, s, PL_bufend, term, &len);
11851         d += len;
11852         if (s < PL_bufend)
11853             s++;
11854     }
11855     else {
11856         if (*s == '\\')
11857             s++, term = '\'';
11858         else
11859             term = '"';
11860         if (!isALNUM_lazy_if(s,UTF))
11861             deprecate("bare << to mean <<\"\"");
11862         for (; isALNUM_lazy_if(s,UTF); s++) {
11863             if (d < e)
11864                 *d++ = *s;
11865         }
11866     }
11867     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11868         Perl_croak(aTHX_ "Delimiter for here document is too long");
11869     *d++ = '\n';
11870     *d = '\0';
11871     len = d - PL_tokenbuf;
11872
11873 #ifdef PERL_MAD
11874     if (PL_madskills) {
11875         tstart = PL_tokenbuf + !outer;
11876         PL_thisclose = newSVpvn(tstart, len - !outer);
11877         tstart = SvPVX(PL_linestr) + stuffstart;
11878         PL_thisopen = newSVpvn(tstart, s - tstart);
11879         stuffstart = s - SvPVX(PL_linestr);
11880     }
11881 #endif
11882 #ifndef PERL_STRICT_CR
11883     d = strchr(s, '\r');
11884     if (d) {
11885         char * const olds = s;
11886         s = d;
11887         while (s < PL_bufend) {
11888             if (*s == '\r') {
11889                 *d++ = '\n';
11890                 if (*++s == '\n')
11891                     s++;
11892             }
11893             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11894                 *d++ = *s++;
11895                 s++;
11896             }
11897             else
11898                 *d++ = *s++;
11899         }
11900         *d = '\0';
11901         PL_bufend = d;
11902         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11903         s = olds;
11904     }
11905 #endif
11906 #ifdef PERL_MAD
11907     found_newline = 0;
11908 #endif
11909     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11910         herewas = newSVpvn(s,PL_bufend-s);
11911     }
11912     else {
11913 #ifdef PERL_MAD
11914         herewas = newSVpvn(s-1,found_newline-s+1);
11915 #else
11916         s--;
11917         herewas = newSVpvn(s,found_newline-s);
11918 #endif
11919     }
11920 #ifdef PERL_MAD
11921     if (PL_madskills) {
11922         tstart = SvPVX(PL_linestr) + stuffstart;
11923         if (PL_thisstuff)
11924             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11925         else
11926             PL_thisstuff = newSVpvn(tstart, s - tstart);
11927     }
11928 #endif
11929     s += SvCUR(herewas);
11930
11931 #ifdef PERL_MAD
11932     stuffstart = s - SvPVX(PL_linestr);
11933
11934     if (found_newline)
11935         s--;
11936 #endif
11937
11938     tmpstr = newSV_type(SVt_PVIV);
11939     SvGROW(tmpstr, 80);
11940     if (term == '\'') {
11941         op_type = OP_CONST;
11942         SvIV_set(tmpstr, -1);
11943     }
11944     else if (term == '`') {
11945         op_type = OP_BACKTICK;
11946         SvIV_set(tmpstr, '\\');
11947     }
11948
11949     CLINE;
11950     PL_multi_start = CopLINE(PL_curcop);
11951     PL_multi_open = PL_multi_close = '<';
11952     term = *PL_tokenbuf;
11953     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11954         char * const bufptr = PL_sublex_info.super_bufptr;
11955         char * const bufend = PL_sublex_info.super_bufend;
11956         char * const olds = s - SvCUR(herewas);
11957         s = strchr(bufptr, '\n');
11958         if (!s)
11959             s = bufend;
11960         d = s;
11961         while (s < bufend &&
11962           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11963             if (*s++ == '\n')
11964                 CopLINE_inc(PL_curcop);
11965         }
11966         if (s >= bufend) {
11967             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11968             missingterm(PL_tokenbuf);
11969         }
11970         sv_setpvn(herewas,bufptr,d-bufptr+1);
11971         sv_setpvn(tmpstr,d+1,s-d);
11972         s += len - 1;
11973         sv_catpvn(herewas,s,bufend-s);
11974         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11975
11976         s = olds;
11977         goto retval;
11978     }
11979     else if (!outer) {
11980         d = s;
11981         while (s < PL_bufend &&
11982           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11983             if (*s++ == '\n')
11984                 CopLINE_inc(PL_curcop);
11985         }
11986         if (s >= PL_bufend) {
11987             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11988             missingterm(PL_tokenbuf);
11989         }
11990         sv_setpvn(tmpstr,d+1,s-d);
11991 #ifdef PERL_MAD
11992         if (PL_madskills) {
11993             if (PL_thisstuff)
11994                 sv_catpvn(PL_thisstuff, d + 1, s - d);
11995             else
11996                 PL_thisstuff = newSVpvn(d + 1, s - d);
11997             stuffstart = s - SvPVX(PL_linestr);
11998         }
11999 #endif
12000         s += len - 1;
12001         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12002
12003         sv_catpvn(herewas,s,PL_bufend-s);
12004         sv_setsv(PL_linestr,herewas);
12005         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12006         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12007         PL_last_lop = PL_last_uni = NULL;
12008     }
12009     else
12010         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12011     while (s >= PL_bufend) {    /* multiple line string? */
12012 #ifdef PERL_MAD
12013         if (PL_madskills) {
12014             tstart = SvPVX(PL_linestr) + stuffstart;
12015             if (PL_thisstuff)
12016                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12017             else
12018                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12019         }
12020 #endif
12021         PL_bufptr = s;
12022         CopLINE_inc(PL_curcop);
12023         if (!outer || !lex_next_chunk(0)) {
12024             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12025             missingterm(PL_tokenbuf);
12026         }
12027         CopLINE_dec(PL_curcop);
12028         s = PL_bufptr;
12029 #ifdef PERL_MAD
12030         stuffstart = s - SvPVX(PL_linestr);
12031 #endif
12032         CopLINE_inc(PL_curcop);
12033         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12034         PL_last_lop = PL_last_uni = NULL;
12035 #ifndef PERL_STRICT_CR
12036         if (PL_bufend - PL_linestart >= 2) {
12037             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12038                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12039             {
12040                 PL_bufend[-2] = '\n';
12041                 PL_bufend--;
12042                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12043             }
12044             else if (PL_bufend[-1] == '\r')
12045                 PL_bufend[-1] = '\n';
12046         }
12047         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12048             PL_bufend[-1] = '\n';
12049 #endif
12050         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12051             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12052             *(SvPVX(PL_linestr) + off ) = ' ';
12053             sv_catsv(PL_linestr,herewas);
12054             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12055             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12056         }
12057         else {
12058             s = PL_bufend;
12059             sv_catsv(tmpstr,PL_linestr);
12060         }
12061     }
12062     s++;
12063 retval:
12064     PL_multi_end = CopLINE(PL_curcop);
12065     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12066         SvPV_shrink_to_cur(tmpstr);
12067     }
12068     SvREFCNT_dec(herewas);
12069     if (!IN_BYTES) {
12070         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12071             SvUTF8_on(tmpstr);
12072         else if (PL_encoding)
12073             sv_recode_to_utf8(tmpstr, PL_encoding);
12074     }
12075     PL_lex_stuff = tmpstr;
12076     pl_yylval.ival = op_type;
12077     return s;
12078 }
12079
12080 /* scan_inputsymbol
12081    takes: current position in input buffer
12082    returns: new position in input buffer
12083    side-effects: pl_yylval and lex_op are set.
12084
12085    This code handles:
12086
12087    <>           read from ARGV
12088    <FH>         read from filehandle
12089    <pkg::FH>    read from package qualified filehandle
12090    <pkg'FH>     read from package qualified filehandle
12091    <$fh>        read from filehandle in $fh
12092    <*.h>        filename glob
12093
12094 */
12095
12096 STATIC char *
12097 S_scan_inputsymbol(pTHX_ char *start)
12098 {
12099     dVAR;
12100     register char *s = start;           /* current position in buffer */
12101     char *end;
12102     I32 len;
12103     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12104     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12105
12106     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12107
12108     end = strchr(s, '\n');
12109     if (!end)
12110         end = PL_bufend;
12111     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12112
12113     /* die if we didn't have space for the contents of the <>,
12114        or if it didn't end, or if we see a newline
12115     */
12116
12117     if (len >= (I32)sizeof PL_tokenbuf)
12118         Perl_croak(aTHX_ "Excessively long <> operator");
12119     if (s >= end)
12120         Perl_croak(aTHX_ "Unterminated <> operator");
12121
12122     s++;
12123
12124     /* check for <$fh>
12125        Remember, only scalar variables are interpreted as filehandles by
12126        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12127        treated as a glob() call.
12128        This code makes use of the fact that except for the $ at the front,
12129        a scalar variable and a filehandle look the same.
12130     */
12131     if (*d == '$' && d[1]) d++;
12132
12133     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12134     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12135         d++;
12136
12137     /* If we've tried to read what we allow filehandles to look like, and
12138        there's still text left, then it must be a glob() and not a getline.
12139        Use scan_str to pull out the stuff between the <> and treat it
12140        as nothing more than a string.
12141     */
12142
12143     if (d - PL_tokenbuf != len) {
12144         pl_yylval.ival = OP_GLOB;
12145         s = scan_str(start,!!PL_madskills,FALSE);
12146         if (!s)
12147            Perl_croak(aTHX_ "Glob not terminated");
12148         return s;
12149     }
12150     else {
12151         bool readline_overriden = FALSE;
12152         GV *gv_readline;
12153         GV **gvp;
12154         /* we're in a filehandle read situation */
12155         d = PL_tokenbuf;
12156
12157         /* turn <> into <ARGV> */
12158         if (!len)
12159             Copy("ARGV",d,5,char);
12160
12161         /* Check whether readline() is overriden */
12162         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12163         if ((gv_readline
12164                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12165                 ||
12166                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12167                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12168                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12169             readline_overriden = TRUE;
12170
12171         /* if <$fh>, create the ops to turn the variable into a
12172            filehandle
12173         */
12174         if (*d == '$') {
12175             /* try to find it in the pad for this block, otherwise find
12176                add symbol table ops
12177             */
12178             const PADOFFSET tmp = pad_findmy(d, len, 0);
12179             if (tmp != NOT_IN_PAD) {
12180                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12181                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12182                     HEK * const stashname = HvNAME_HEK(stash);
12183                     SV * const sym = sv_2mortal(newSVhek(stashname));
12184                     sv_catpvs(sym, "::");
12185                     sv_catpv(sym, d+1);
12186                     d = SvPVX(sym);
12187                     goto intro_sym;
12188                 }
12189                 else {
12190                     OP * const o = newOP(OP_PADSV, 0);
12191                     o->op_targ = tmp;
12192                     PL_lex_op = readline_overriden
12193                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12194                                 append_elem(OP_LIST, o,
12195                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12196                         : (OP*)newUNOP(OP_READLINE, 0, o);
12197                 }
12198             }
12199             else {
12200                 GV *gv;
12201                 ++d;
12202 intro_sym:
12203                 gv = gv_fetchpv(d,
12204                                 (PL_in_eval
12205                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12206                                  : GV_ADDMULTI),
12207                                 SVt_PV);
12208                 PL_lex_op = readline_overriden
12209                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12210                             append_elem(OP_LIST,
12211                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12212                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12213                     : (OP*)newUNOP(OP_READLINE, 0,
12214                             newUNOP(OP_RV2SV, 0,
12215                                 newGVOP(OP_GV, 0, gv)));
12216             }
12217             if (!readline_overriden)
12218                 PL_lex_op->op_flags |= OPf_SPECIAL;
12219             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12220             pl_yylval.ival = OP_NULL;
12221         }
12222
12223         /* If it's none of the above, it must be a literal filehandle
12224            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12225         else {
12226             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12227             PL_lex_op = readline_overriden
12228                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12229                         append_elem(OP_LIST,
12230                             newGVOP(OP_GV, 0, gv),
12231                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12232                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12233             pl_yylval.ival = OP_NULL;
12234         }
12235     }
12236
12237     return s;
12238 }
12239
12240
12241 /* scan_str
12242    takes: start position in buffer
12243           keep_quoted preserve \ on the embedded delimiter(s)
12244           keep_delims preserve the delimiters around the string
12245    returns: position to continue reading from buffer
12246    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12247         updates the read buffer.
12248
12249    This subroutine pulls a string out of the input.  It is called for:
12250         q               single quotes           q(literal text)
12251         '               single quotes           'literal text'
12252         qq              double quotes           qq(interpolate $here please)
12253         "               double quotes           "interpolate $here please"
12254         qx              backticks               qx(/bin/ls -l)
12255         `               backticks               `/bin/ls -l`
12256         qw              quote words             @EXPORT_OK = qw( func() $spam )
12257         m//             regexp match            m/this/
12258         s///            regexp substitute       s/this/that/
12259         tr///           string transliterate    tr/this/that/
12260         y///            string transliterate    y/this/that/
12261         ($*@)           sub prototypes          sub foo ($)
12262         (stuff)         sub attr parameters     sub foo : attr(stuff)
12263         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12264
12265    In most of these cases (all but <>, patterns and transliterate)
12266    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12267    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12268    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12269    calls scan_str().
12270
12271    It skips whitespace before the string starts, and treats the first
12272    character as the delimiter.  If the delimiter is one of ([{< then
12273    the corresponding "close" character )]}> is used as the closing
12274    delimiter.  It allows quoting of delimiters, and if the string has
12275    balanced delimiters ([{<>}]) it allows nesting.
12276
12277    On success, the SV with the resulting string is put into lex_stuff or,
12278    if that is already non-NULL, into lex_repl. The second case occurs only
12279    when parsing the RHS of the special constructs s/// and tr/// (y///).
12280    For convenience, the terminating delimiter character is stuffed into
12281    SvIVX of the SV.
12282 */
12283
12284 STATIC char *
12285 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12286 {
12287     dVAR;
12288     SV *sv;                             /* scalar value: string */
12289     const char *tmps;                   /* temp string, used for delimiter matching */
12290     register char *s = start;           /* current position in the buffer */
12291     register char term;                 /* terminating character */
12292     register char *to;                  /* current position in the sv's data */
12293     I32 brackets = 1;                   /* bracket nesting level */
12294     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12295     I32 termcode;                       /* terminating char. code */
12296     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12297     STRLEN termlen;                     /* length of terminating string */
12298     int last_off = 0;                   /* last position for nesting bracket */
12299 #ifdef PERL_MAD
12300     int stuffstart;
12301     char *tstart;
12302 #endif
12303
12304     PERL_ARGS_ASSERT_SCAN_STR;
12305
12306     /* skip space before the delimiter */
12307     if (isSPACE(*s)) {
12308         s = PEEKSPACE(s);
12309     }
12310
12311 #ifdef PERL_MAD
12312     if (PL_realtokenstart >= 0) {
12313         stuffstart = PL_realtokenstart;
12314         PL_realtokenstart = -1;
12315     }
12316     else
12317         stuffstart = start - SvPVX(PL_linestr);
12318 #endif
12319     /* mark where we are, in case we need to report errors */
12320     CLINE;
12321
12322     /* after skipping whitespace, the next character is the terminator */
12323     term = *s;
12324     if (!UTF) {
12325         termcode = termstr[0] = term;
12326         termlen = 1;
12327     }
12328     else {
12329         termcode = utf8_to_uvchr((U8*)s, &termlen);
12330         Copy(s, termstr, termlen, U8);
12331         if (!UTF8_IS_INVARIANT(term))
12332             has_utf8 = TRUE;
12333     }
12334
12335     /* mark where we are */
12336     PL_multi_start = CopLINE(PL_curcop);
12337     PL_multi_open = term;
12338
12339     /* find corresponding closing delimiter */
12340     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12341         termcode = termstr[0] = term = tmps[5];
12342
12343     PL_multi_close = term;
12344
12345     /* create a new SV to hold the contents.  79 is the SV's initial length.
12346        What a random number. */
12347     sv = newSV_type(SVt_PVIV);
12348     SvGROW(sv, 80);
12349     SvIV_set(sv, termcode);
12350     (void)SvPOK_only(sv);               /* validate pointer */
12351
12352     /* move past delimiter and try to read a complete string */
12353     if (keep_delims)
12354         sv_catpvn(sv, s, termlen);
12355     s += termlen;
12356 #ifdef PERL_MAD
12357     tstart = SvPVX(PL_linestr) + stuffstart;
12358     if (!PL_thisopen && !keep_delims) {
12359         PL_thisopen = newSVpvn(tstart, s - tstart);
12360         stuffstart = s - SvPVX(PL_linestr);
12361     }
12362 #endif
12363     for (;;) {
12364         if (PL_encoding && !UTF) {
12365             bool cont = TRUE;
12366
12367             while (cont) {
12368                 int offset = s - SvPVX_const(PL_linestr);
12369                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12370                                            &offset, (char*)termstr, termlen);
12371                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12372                 char * const svlast = SvEND(sv) - 1;
12373
12374                 for (; s < ns; s++) {
12375                     if (*s == '\n' && !PL_rsfp)
12376                         CopLINE_inc(PL_curcop);
12377                 }
12378                 if (!found)
12379                     goto read_more_line;
12380                 else {
12381                     /* handle quoted delimiters */
12382                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12383                         const char *t;
12384                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12385                             t--;
12386                         if ((svlast-1 - t) % 2) {
12387                             if (!keep_quoted) {
12388                                 *(svlast-1) = term;
12389                                 *svlast = '\0';
12390                                 SvCUR_set(sv, SvCUR(sv) - 1);
12391                             }
12392                             continue;
12393                         }
12394                     }
12395                     if (PL_multi_open == PL_multi_close) {
12396                         cont = FALSE;
12397                     }
12398                     else {
12399                         const char *t;
12400                         char *w;
12401                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12402                             /* At here, all closes are "was quoted" one,
12403                                so we don't check PL_multi_close. */
12404                             if (*t == '\\') {
12405                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12406                                     t++;
12407                                 else
12408                                     *w++ = *t++;
12409                             }
12410                             else if (*t == PL_multi_open)
12411                                 brackets++;
12412
12413                             *w = *t;
12414                         }
12415                         if (w < t) {
12416                             *w++ = term;
12417                             *w = '\0';
12418                             SvCUR_set(sv, w - SvPVX_const(sv));
12419                         }
12420                         last_off = w - SvPVX(sv);
12421                         if (--brackets <= 0)
12422                             cont = FALSE;
12423                     }
12424                 }
12425             }
12426             if (!keep_delims) {
12427                 SvCUR_set(sv, SvCUR(sv) - 1);
12428                 *SvEND(sv) = '\0';
12429             }
12430             break;
12431         }
12432
12433         /* extend sv if need be */
12434         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12435         /* set 'to' to the next character in the sv's string */
12436         to = SvPVX(sv)+SvCUR(sv);
12437
12438         /* if open delimiter is the close delimiter read unbridle */
12439         if (PL_multi_open == PL_multi_close) {
12440             for (; s < PL_bufend; s++,to++) {
12441                 /* embedded newlines increment the current line number */
12442                 if (*s == '\n' && !PL_rsfp)
12443                     CopLINE_inc(PL_curcop);
12444                 /* handle quoted delimiters */
12445                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12446                     if (!keep_quoted && s[1] == term)
12447                         s++;
12448                 /* any other quotes are simply copied straight through */
12449                     else
12450                         *to++ = *s++;
12451                 }
12452                 /* terminate when run out of buffer (the for() condition), or
12453                    have found the terminator */
12454                 else if (*s == term) {
12455                     if (termlen == 1)
12456                         break;
12457                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12458                         break;
12459                 }
12460                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12461                     has_utf8 = TRUE;
12462                 *to = *s;
12463             }
12464         }
12465
12466         /* if the terminator isn't the same as the start character (e.g.,
12467            matched brackets), we have to allow more in the quoting, and
12468            be prepared for nested brackets.
12469         */
12470         else {
12471             /* read until we run out of string, or we find the terminator */
12472             for (; s < PL_bufend; s++,to++) {
12473                 /* embedded newlines increment the line count */
12474                 if (*s == '\n' && !PL_rsfp)
12475                     CopLINE_inc(PL_curcop);
12476                 /* backslashes can escape the open or closing characters */
12477                 if (*s == '\\' && s+1 < PL_bufend) {
12478                     if (!keep_quoted &&
12479                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12480                         s++;
12481                     else
12482                         *to++ = *s++;
12483                 }
12484                 /* allow nested opens and closes */
12485                 else if (*s == PL_multi_close && --brackets <= 0)
12486                     break;
12487                 else if (*s == PL_multi_open)
12488                     brackets++;
12489                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12490                     has_utf8 = TRUE;
12491                 *to = *s;
12492             }
12493         }
12494         /* terminate the copied string and update the sv's end-of-string */
12495         *to = '\0';
12496         SvCUR_set(sv, to - SvPVX_const(sv));
12497
12498         /*
12499          * this next chunk reads more into the buffer if we're not done yet
12500          */
12501
12502         if (s < PL_bufend)
12503             break;              /* handle case where we are done yet :-) */
12504
12505 #ifndef PERL_STRICT_CR
12506         if (to - SvPVX_const(sv) >= 2) {
12507             if ((to[-2] == '\r' && to[-1] == '\n') ||
12508                 (to[-2] == '\n' && to[-1] == '\r'))
12509             {
12510                 to[-2] = '\n';
12511                 to--;
12512                 SvCUR_set(sv, to - SvPVX_const(sv));
12513             }
12514             else if (to[-1] == '\r')
12515                 to[-1] = '\n';
12516         }
12517         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12518             to[-1] = '\n';
12519 #endif
12520
12521      read_more_line:
12522         /* if we're out of file, or a read fails, bail and reset the current
12523            line marker so we can report where the unterminated string began
12524         */
12525 #ifdef PERL_MAD
12526         if (PL_madskills) {
12527             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12528             if (PL_thisstuff)
12529                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12530             else
12531                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12532         }
12533 #endif
12534         CopLINE_inc(PL_curcop);
12535         PL_bufptr = PL_bufend;
12536         if (!lex_next_chunk(0)) {
12537             sv_free(sv);
12538             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12539             return NULL;
12540         }
12541         s = PL_bufptr;
12542 #ifdef PERL_MAD
12543         stuffstart = 0;
12544 #endif
12545     }
12546
12547     /* at this point, we have successfully read the delimited string */
12548
12549     if (!PL_encoding || UTF) {
12550 #ifdef PERL_MAD
12551         if (PL_madskills) {
12552             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12553             const int len = s - tstart;
12554             if (PL_thisstuff)
12555                 sv_catpvn(PL_thisstuff, tstart, len);
12556             else
12557                 PL_thisstuff = newSVpvn(tstart, len);
12558             if (!PL_thisclose && !keep_delims)
12559                 PL_thisclose = newSVpvn(s,termlen);
12560         }
12561 #endif
12562
12563         if (keep_delims)
12564             sv_catpvn(sv, s, termlen);
12565         s += termlen;
12566     }
12567 #ifdef PERL_MAD
12568     else {
12569         if (PL_madskills) {
12570             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12571             const int len = s - tstart - termlen;
12572             if (PL_thisstuff)
12573                 sv_catpvn(PL_thisstuff, tstart, len);
12574             else
12575                 PL_thisstuff = newSVpvn(tstart, len);
12576             if (!PL_thisclose && !keep_delims)
12577                 PL_thisclose = newSVpvn(s - termlen,termlen);
12578         }
12579     }
12580 #endif
12581     if (has_utf8 || PL_encoding)
12582         SvUTF8_on(sv);
12583
12584     PL_multi_end = CopLINE(PL_curcop);
12585
12586     /* if we allocated too much space, give some back */
12587     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12588         SvLEN_set(sv, SvCUR(sv) + 1);
12589         SvPV_renew(sv, SvLEN(sv));
12590     }
12591
12592     /* decide whether this is the first or second quoted string we've read
12593        for this op
12594     */
12595
12596     if (PL_lex_stuff)
12597         PL_lex_repl = sv;
12598     else
12599         PL_lex_stuff = sv;
12600     return s;
12601 }
12602
12603 /*
12604   scan_num
12605   takes: pointer to position in buffer
12606   returns: pointer to new position in buffer
12607   side-effects: builds ops for the constant in pl_yylval.op
12608
12609   Read a number in any of the formats that Perl accepts:
12610
12611   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12612   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12613   0b[01](_?[01])*
12614   0[0-7](_?[0-7])*
12615   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12616
12617   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12618   thing it reads.
12619
12620   If it reads a number without a decimal point or an exponent, it will
12621   try converting the number to an integer and see if it can do so
12622   without loss of precision.
12623 */
12624
12625 char *
12626 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12627 {
12628     dVAR;
12629     register const char *s = start;     /* current position in buffer */
12630     register char *d;                   /* destination in temp buffer */
12631     register char *e;                   /* end of temp buffer */
12632     NV nv;                              /* number read, as a double */
12633     SV *sv = NULL;                      /* place to put the converted number */
12634     bool floatit;                       /* boolean: int or float? */
12635     const char *lastub = NULL;          /* position of last underbar */
12636     static char const number_too_long[] = "Number too long";
12637
12638     PERL_ARGS_ASSERT_SCAN_NUM;
12639
12640     /* We use the first character to decide what type of number this is */
12641
12642     switch (*s) {
12643     default:
12644       Perl_croak(aTHX_ "panic: scan_num");
12645
12646     /* if it starts with a 0, it could be an octal number, a decimal in
12647        0.13 disguise, or a hexadecimal number, or a binary number. */
12648     case '0':
12649         {
12650           /* variables:
12651              u          holds the "number so far"
12652              shift      the power of 2 of the base
12653                         (hex == 4, octal == 3, binary == 1)
12654              overflowed was the number more than we can hold?
12655
12656              Shift is used when we add a digit.  It also serves as an "are
12657              we in octal/hex/binary?" indicator to disallow hex characters
12658              when in octal mode.
12659            */
12660             NV n = 0.0;
12661             UV u = 0;
12662             I32 shift;
12663             bool overflowed = FALSE;
12664             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12665             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12666             static const char* const bases[5] =
12667               { "", "binary", "", "octal", "hexadecimal" };
12668             static const char* const Bases[5] =
12669               { "", "Binary", "", "Octal", "Hexadecimal" };
12670             static const char* const maxima[5] =
12671               { "",
12672                 "0b11111111111111111111111111111111",
12673                 "",
12674                 "037777777777",
12675                 "0xffffffff" };
12676             const char *base, *Base, *max;
12677
12678             /* check for hex */
12679             if (s[1] == 'x') {
12680                 shift = 4;
12681                 s += 2;
12682                 just_zero = FALSE;
12683             } else if (s[1] == 'b') {
12684                 shift = 1;
12685                 s += 2;
12686                 just_zero = FALSE;
12687             }
12688             /* check for a decimal in disguise */
12689             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12690                 goto decimal;
12691             /* so it must be octal */
12692             else {
12693                 shift = 3;
12694                 s++;
12695             }
12696
12697             if (*s == '_') {
12698                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12699                                "Misplaced _ in number");
12700                lastub = s++;
12701             }
12702
12703             base = bases[shift];
12704             Base = Bases[shift];
12705             max  = maxima[shift];
12706
12707             /* read the rest of the number */
12708             for (;;) {
12709                 /* x is used in the overflow test,
12710                    b is the digit we're adding on. */
12711                 UV x, b;
12712
12713                 switch (*s) {
12714
12715                 /* if we don't mention it, we're done */
12716                 default:
12717                     goto out;
12718
12719                 /* _ are ignored -- but warned about if consecutive */
12720                 case '_':
12721                     if (lastub && s == lastub + 1)
12722                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12723                                        "Misplaced _ in number");
12724                     lastub = s++;
12725                     break;
12726
12727                 /* 8 and 9 are not octal */
12728                 case '8': case '9':
12729                     if (shift == 3)
12730                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12731                     /* FALL THROUGH */
12732
12733                 /* octal digits */
12734                 case '2': case '3': case '4':
12735                 case '5': case '6': case '7':
12736                     if (shift == 1)
12737                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12738                     /* FALL THROUGH */
12739
12740                 case '0': case '1':
12741                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12742                     goto digit;
12743
12744                 /* hex digits */
12745                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12746                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12747                     /* make sure they said 0x */
12748                     if (shift != 4)
12749                         goto out;
12750                     b = (*s++ & 7) + 9;
12751
12752                     /* Prepare to put the digit we have onto the end
12753                        of the number so far.  We check for overflows.
12754                     */
12755
12756                   digit:
12757                     just_zero = FALSE;
12758                     if (!overflowed) {
12759                         x = u << shift; /* make room for the digit */
12760
12761                         if ((x >> shift) != u
12762                             && !(PL_hints & HINT_NEW_BINARY)) {
12763                             overflowed = TRUE;
12764                             n = (NV) u;
12765                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12766                                              "Integer overflow in %s number",
12767                                              base);
12768                         } else
12769                             u = x | b;          /* add the digit to the end */
12770                     }
12771                     if (overflowed) {
12772                         n *= nvshift[shift];
12773                         /* If an NV has not enough bits in its
12774                          * mantissa to represent an UV this summing of
12775                          * small low-order numbers is a waste of time
12776                          * (because the NV cannot preserve the
12777                          * low-order bits anyway): we could just
12778                          * remember when did we overflow and in the
12779                          * end just multiply n by the right
12780                          * amount. */
12781                         n += (NV) b;
12782                     }
12783                     break;
12784                 }
12785             }
12786
12787           /* if we get here, we had success: make a scalar value from
12788              the number.
12789           */
12790           out:
12791
12792             /* final misplaced underbar check */
12793             if (s[-1] == '_') {
12794                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12795             }
12796
12797             sv = newSV(0);
12798             if (overflowed) {
12799                 if (n > 4294967295.0)
12800                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12801                                    "%s number > %s non-portable",
12802                                    Base, max);
12803                 sv_setnv(sv, n);
12804             }
12805             else {
12806 #if UVSIZE > 4
12807                 if (u > 0xffffffff)
12808                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12809                                    "%s number > %s non-portable",
12810                                    Base, max);
12811 #endif
12812                 sv_setuv(sv, u);
12813             }
12814             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12815                 sv = new_constant(start, s - start, "integer",
12816                                   sv, NULL, NULL, 0);
12817             else if (PL_hints & HINT_NEW_BINARY)
12818                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12819         }
12820         break;
12821
12822     /*
12823       handle decimal numbers.
12824       we're also sent here when we read a 0 as the first digit
12825     */
12826     case '1': case '2': case '3': case '4': case '5':
12827     case '6': case '7': case '8': case '9': case '.':
12828       decimal:
12829         d = PL_tokenbuf;
12830         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12831         floatit = FALSE;
12832
12833         /* read next group of digits and _ and copy into d */
12834         while (isDIGIT(*s) || *s == '_') {
12835             /* skip underscores, checking for misplaced ones
12836                if -w is on
12837             */
12838             if (*s == '_') {
12839                 if (lastub && s == lastub + 1)
12840                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12841                                    "Misplaced _ in number");
12842                 lastub = s++;
12843             }
12844             else {
12845                 /* check for end of fixed-length buffer */
12846                 if (d >= e)
12847                     Perl_croak(aTHX_ number_too_long);
12848                 /* if we're ok, copy the character */
12849                 *d++ = *s++;
12850             }
12851         }
12852
12853         /* final misplaced underbar check */
12854         if (lastub && s == lastub + 1) {
12855             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12856         }
12857
12858         /* read a decimal portion if there is one.  avoid
12859            3..5 being interpreted as the number 3. followed
12860            by .5
12861         */
12862         if (*s == '.' && s[1] != '.') {
12863             floatit = TRUE;
12864             *d++ = *s++;
12865
12866             if (*s == '_') {
12867                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12868                                "Misplaced _ in number");
12869                 lastub = s;
12870             }
12871
12872             /* copy, ignoring underbars, until we run out of digits.
12873             */
12874             for (; isDIGIT(*s) || *s == '_'; s++) {
12875                 /* fixed length buffer check */
12876                 if (d >= e)
12877                     Perl_croak(aTHX_ number_too_long);
12878                 if (*s == '_') {
12879                    if (lastub && s == lastub + 1)
12880                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12881                                       "Misplaced _ in number");
12882                    lastub = s;
12883                 }
12884                 else
12885                     *d++ = *s;
12886             }
12887             /* fractional part ending in underbar? */
12888             if (s[-1] == '_') {
12889                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12890                                "Misplaced _ in number");
12891             }
12892             if (*s == '.' && isDIGIT(s[1])) {
12893                 /* oops, it's really a v-string, but without the "v" */
12894                 s = start;
12895                 goto vstring;
12896             }
12897         }
12898
12899         /* read exponent part, if present */
12900         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12901             floatit = TRUE;
12902             s++;
12903
12904             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12905             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12906
12907             /* stray preinitial _ */
12908             if (*s == '_') {
12909                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12910                                "Misplaced _ in number");
12911                 lastub = s++;
12912             }
12913
12914             /* allow positive or negative exponent */
12915             if (*s == '+' || *s == '-')
12916                 *d++ = *s++;
12917
12918             /* stray initial _ */
12919             if (*s == '_') {
12920                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12921                                "Misplaced _ in number");
12922                 lastub = s++;
12923             }
12924
12925             /* read digits of exponent */
12926             while (isDIGIT(*s) || *s == '_') {
12927                 if (isDIGIT(*s)) {
12928                     if (d >= e)
12929                         Perl_croak(aTHX_ number_too_long);
12930                     *d++ = *s++;
12931                 }
12932                 else {
12933                    if (((lastub && s == lastub + 1) ||
12934                         (!isDIGIT(s[1]) && s[1] != '_')))
12935                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12936                                       "Misplaced _ in number");
12937                    lastub = s++;
12938                 }
12939             }
12940         }
12941
12942
12943         /* make an sv from the string */
12944         sv = newSV(0);
12945
12946         /*
12947            We try to do an integer conversion first if no characters
12948            indicating "float" have been found.
12949          */
12950
12951         if (!floatit) {
12952             UV uv;
12953             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12954
12955             if (flags == IS_NUMBER_IN_UV) {
12956               if (uv <= IV_MAX)
12957                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12958               else
12959                 sv_setuv(sv, uv);
12960             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12961               if (uv <= (UV) IV_MIN)
12962                 sv_setiv(sv, -(IV)uv);
12963               else
12964                 floatit = TRUE;
12965             } else
12966               floatit = TRUE;
12967         }
12968         if (floatit) {
12969             /* terminate the string */
12970             *d = '\0';
12971             nv = Atof(PL_tokenbuf);
12972             sv_setnv(sv, nv);
12973         }
12974
12975         if ( floatit
12976              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12977             const char *const key = floatit ? "float" : "integer";
12978             const STRLEN keylen = floatit ? 5 : 7;
12979             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12980                                 key, keylen, sv, NULL, NULL, 0);
12981         }
12982         break;
12983
12984     /* if it starts with a v, it could be a v-string */
12985     case 'v':
12986 vstring:
12987                 sv = newSV(5); /* preallocate storage space */
12988                 s = scan_vstring(s, PL_bufend, sv);
12989         break;
12990     }
12991
12992     /* make the op for the constant and return */
12993
12994     if (sv)
12995         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12996     else
12997         lvalp->opval = NULL;
12998
12999     return (char *)s;
13000 }
13001
13002 STATIC char *
13003 S_scan_formline(pTHX_ register char *s)
13004 {
13005     dVAR;
13006     register char *eol;
13007     register char *t;
13008     SV * const stuff = newSVpvs("");
13009     bool needargs = FALSE;
13010     bool eofmt = FALSE;
13011 #ifdef PERL_MAD
13012     char *tokenstart = s;
13013     SV* savewhite = NULL;
13014
13015     if (PL_madskills) {
13016         savewhite = PL_thiswhite;
13017         PL_thiswhite = 0;
13018     }
13019 #endif
13020
13021     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13022
13023     while (!needargs) {
13024         if (*s == '.') {
13025             t = s+1;
13026 #ifdef PERL_STRICT_CR
13027             while (SPACE_OR_TAB(*t))
13028                 t++;
13029 #else
13030             while (SPACE_OR_TAB(*t) || *t == '\r')
13031                 t++;
13032 #endif
13033             if (*t == '\n' || t == PL_bufend) {
13034                 eofmt = TRUE;
13035                 break;
13036             }
13037         }
13038         if (PL_in_eval && !PL_rsfp) {
13039             eol = (char *) memchr(s,'\n',PL_bufend-s);
13040             if (!eol++)
13041                 eol = PL_bufend;
13042         }
13043         else
13044             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13045         if (*s != '#') {
13046             for (t = s; t < eol; t++) {
13047                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13048                     needargs = FALSE;
13049                     goto enough;        /* ~~ must be first line in formline */
13050                 }
13051                 if (*t == '@' || *t == '^')
13052                     needargs = TRUE;
13053             }
13054             if (eol > s) {
13055                 sv_catpvn(stuff, s, eol-s);
13056 #ifndef PERL_STRICT_CR
13057                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13058                     char *end = SvPVX(stuff) + SvCUR(stuff);
13059                     end[-2] = '\n';
13060                     end[-1] = '\0';
13061                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13062                 }
13063 #endif
13064             }
13065             else
13066               break;
13067         }
13068         s = (char*)eol;
13069         if (PL_rsfp) {
13070             bool got_some;
13071 #ifdef PERL_MAD
13072             if (PL_madskills) {
13073                 if (PL_thistoken)
13074                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13075                 else
13076                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13077             }
13078 #endif
13079             PL_bufptr = PL_bufend;
13080             CopLINE_inc(PL_curcop);
13081             got_some = lex_next_chunk(0);
13082             CopLINE_dec(PL_curcop);
13083             s = PL_bufptr;
13084 #ifdef PERL_MAD
13085             tokenstart = PL_bufptr;
13086 #endif
13087             if (!got_some)
13088                 break;
13089         }
13090         incline(s);
13091     }
13092   enough:
13093     if (SvCUR(stuff)) {
13094         PL_expect = XTERM;
13095         if (needargs) {
13096             PL_lex_state = LEX_NORMAL;
13097             start_force(PL_curforce);
13098             NEXTVAL_NEXTTOKE.ival = 0;
13099             force_next(',');
13100         }
13101         else
13102             PL_lex_state = LEX_FORMLINE;
13103         if (!IN_BYTES) {
13104             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13105                 SvUTF8_on(stuff);
13106             else if (PL_encoding)
13107                 sv_recode_to_utf8(stuff, PL_encoding);
13108         }
13109         start_force(PL_curforce);
13110         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13111         force_next(THING);
13112         start_force(PL_curforce);
13113         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13114         force_next(LSTOP);
13115     }
13116     else {
13117         SvREFCNT_dec(stuff);
13118         if (eofmt)
13119             PL_lex_formbrack = 0;
13120         PL_bufptr = s;
13121     }
13122 #ifdef PERL_MAD
13123     if (PL_madskills) {
13124         if (PL_thistoken)
13125             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13126         else
13127             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13128         PL_thiswhite = savewhite;
13129     }
13130 #endif
13131     return s;
13132 }
13133
13134 I32
13135 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13136 {
13137     dVAR;
13138     const I32 oldsavestack_ix = PL_savestack_ix;
13139     CV* const outsidecv = PL_compcv;
13140
13141     if (PL_compcv) {
13142         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13143     }
13144     SAVEI32(PL_subline);
13145     save_item(PL_subname);
13146     SAVESPTR(PL_compcv);
13147
13148     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13149     CvFLAGS(PL_compcv) |= flags;
13150
13151     PL_subline = CopLINE(PL_curcop);
13152     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13153     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13154     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13155
13156     return oldsavestack_ix;
13157 }
13158
13159 #ifdef __SC__
13160 #pragma segment Perl_yylex
13161 #endif
13162 static int
13163 S_yywarn(pTHX_ const char *const s)
13164 {
13165     dVAR;
13166
13167     PERL_ARGS_ASSERT_YYWARN;
13168
13169     PL_in_eval |= EVAL_WARNONLY;
13170     yyerror(s);
13171     PL_in_eval &= ~EVAL_WARNONLY;
13172     return 0;
13173 }
13174
13175 int
13176 Perl_yyerror(pTHX_ const char *const s)
13177 {
13178     dVAR;
13179     const char *where = NULL;
13180     const char *context = NULL;
13181     int contlen = -1;
13182     SV *msg;
13183     int yychar  = PL_parser->yychar;
13184
13185     PERL_ARGS_ASSERT_YYERROR;
13186
13187     if (!yychar || (yychar == ';' && !PL_rsfp))
13188         where = "at EOF";
13189     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13190       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13191       PL_oldbufptr != PL_bufptr) {
13192         /*
13193                 Only for NetWare:
13194                 The code below is removed for NetWare because it abends/crashes on NetWare
13195                 when the script has error such as not having the closing quotes like:
13196                     if ($var eq "value)
13197                 Checking of white spaces is anyway done in NetWare code.
13198         */
13199 #ifndef NETWARE
13200         while (isSPACE(*PL_oldoldbufptr))
13201             PL_oldoldbufptr++;
13202 #endif
13203         context = PL_oldoldbufptr;
13204         contlen = PL_bufptr - PL_oldoldbufptr;
13205     }
13206     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13207       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13208         /*
13209                 Only for NetWare:
13210                 The code below is removed for NetWare because it abends/crashes on NetWare
13211                 when the script has error such as not having the closing quotes like:
13212                     if ($var eq "value)
13213                 Checking of white spaces is anyway done in NetWare code.
13214         */
13215 #ifndef NETWARE
13216         while (isSPACE(*PL_oldbufptr))
13217             PL_oldbufptr++;
13218 #endif
13219         context = PL_oldbufptr;
13220         contlen = PL_bufptr - PL_oldbufptr;
13221     }
13222     else if (yychar > 255)
13223         where = "next token ???";
13224     else if (yychar == -2) { /* YYEMPTY */
13225         if (PL_lex_state == LEX_NORMAL ||
13226            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13227             where = "at end of line";
13228         else if (PL_lex_inpat)
13229             where = "within pattern";
13230         else
13231             where = "within string";
13232     }
13233     else {
13234         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13235         if (yychar < 32)
13236             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13237         else if (isPRINT_LC(yychar)) {
13238             const char string = yychar;
13239             sv_catpvn(where_sv, &string, 1);
13240         }
13241         else
13242             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13243         where = SvPVX_const(where_sv);
13244     }
13245     msg = sv_2mortal(newSVpv(s, 0));
13246     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13247         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13248     if (context)
13249         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13250     else
13251         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13252     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13253         Perl_sv_catpvf(aTHX_ msg,
13254         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13255                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13256         PL_multi_end = 0;
13257     }
13258     if (PL_in_eval & EVAL_WARNONLY) {
13259         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13260     }
13261     else
13262         qerror(msg);
13263     if (PL_error_count >= 10) {
13264         if (PL_in_eval && SvCUR(ERRSV))
13265             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13266                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13267         else
13268             Perl_croak(aTHX_ "%s has too many errors.\n",
13269             OutCopFILE(PL_curcop));
13270     }
13271     PL_in_my = 0;
13272     PL_in_my_stash = NULL;
13273     return 0;
13274 }
13275 #ifdef __SC__
13276 #pragma segment Main
13277 #endif
13278
13279 STATIC char*
13280 S_swallow_bom(pTHX_ U8 *s)
13281 {
13282     dVAR;
13283     const STRLEN slen = SvCUR(PL_linestr);
13284
13285     PERL_ARGS_ASSERT_SWALLOW_BOM;
13286
13287     switch (s[0]) {
13288     case 0xFF:
13289         if (s[1] == 0xFE) {
13290             /* UTF-16 little-endian? (or UTF-32LE?) */
13291             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13292                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13293 #ifndef PERL_NO_UTF16_FILTER
13294             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13295             s += 2;
13296             if (PL_bufend > (char*)s) {
13297                 s = add_utf16_textfilter(s, TRUE);
13298             }
13299 #else
13300             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13301 #endif
13302         }
13303         break;
13304     case 0xFE:
13305         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13306 #ifndef PERL_NO_UTF16_FILTER
13307             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13308             s += 2;
13309             if (PL_bufend > (char *)s) {
13310                 s = add_utf16_textfilter(s, FALSE);
13311             }
13312 #else
13313             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13314 #endif
13315         }
13316         break;
13317     case 0xEF:
13318         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13319             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13320             s += 3;                      /* UTF-8 */
13321         }
13322         break;
13323     case 0:
13324         if (slen > 3) {
13325              if (s[1] == 0) {
13326                   if (s[2] == 0xFE && s[3] == 0xFF) {
13327                        /* UTF-32 big-endian */
13328                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13329                   }
13330              }
13331              else if (s[2] == 0 && s[3] != 0) {
13332                   /* Leading bytes
13333                    * 00 xx 00 xx
13334                    * are a good indicator of UTF-16BE. */
13335 #ifndef PERL_NO_UTF16_FILTER
13336                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13337                   s = add_utf16_textfilter(s, FALSE);
13338 #else
13339                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13340 #endif
13341              }
13342         }
13343 #ifdef EBCDIC
13344     case 0xDD:
13345         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13346             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13347             s += 4;                      /* UTF-8 */
13348         }
13349         break;
13350 #endif
13351
13352     default:
13353          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13354                   /* Leading bytes
13355                    * xx 00 xx 00
13356                    * are a good indicator of UTF-16LE. */
13357 #ifndef PERL_NO_UTF16_FILTER
13358               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13359               s = add_utf16_textfilter(s, TRUE);
13360 #else
13361               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13362 #endif
13363          }
13364     }
13365     return (char*)s;
13366 }
13367
13368
13369 #ifndef PERL_NO_UTF16_FILTER
13370 static I32
13371 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13372 {
13373     dVAR;
13374     SV *const filter = FILTER_DATA(idx);
13375     /* We re-use this each time round, throwing the contents away before we
13376        return.  */
13377     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13378     SV *const utf8_buffer = filter;
13379     IV status = IoPAGE(filter);
13380     const bool reverse = (bool) IoLINES(filter);
13381     I32 retval;
13382
13383     /* As we're automatically added, at the lowest level, and hence only called
13384        from this file, we can be sure that we're not called in block mode. Hence
13385        don't bother writing code to deal with block mode.  */
13386     if (maxlen) {
13387         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13388     }
13389     if (status < 0) {
13390         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13391     }
13392     DEBUG_P(PerlIO_printf(Perl_debug_log,
13393                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13394                           FPTR2DPTR(void *, S_utf16_textfilter),
13395                           reverse ? 'l' : 'b', idx, maxlen, status,
13396                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13397
13398     while (1) {
13399         STRLEN chars;
13400         STRLEN have;
13401         I32 newlen;
13402         U8 *end;
13403         /* First, look in our buffer of existing UTF-8 data:  */
13404         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13405
13406         if (nl) {
13407             ++nl;
13408         } else if (status == 0) {
13409             /* EOF */
13410             IoPAGE(filter) = 0;
13411             nl = SvEND(utf8_buffer);
13412         }
13413         if (nl) {
13414             STRLEN got = nl - SvPVX(utf8_buffer);
13415             /* Did we have anything to append?  */
13416             retval = got != 0;
13417             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13418             /* Everything else in this code works just fine if SVp_POK isn't
13419                set.  This, however, needs it, and we need it to work, else
13420                we loop infinitely because the buffer is never consumed.  */
13421             sv_chop(utf8_buffer, nl);
13422             break;
13423         }
13424
13425         /* OK, not a complete line there, so need to read some more UTF-16.
13426            Read an extra octect if the buffer currently has an odd number. */
13427         while (1) {
13428             if (status <= 0)
13429                 break;
13430             if (SvCUR(utf16_buffer) >= 2) {
13431                 /* Location of the high octet of the last complete code point.
13432                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13433                    *coupled* with all the benefits of partial reads and
13434                    endianness.  */
13435                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13436                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13437
13438                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13439                     break;
13440                 }
13441
13442                 /* We have the first half of a surrogate. Read more.  */
13443                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13444             }
13445
13446             status = FILTER_READ(idx + 1, utf16_buffer,
13447                                  160 + (SvCUR(utf16_buffer) & 1));
13448             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13449             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13450             if (status < 0) {
13451                 /* Error */
13452                 IoPAGE(filter) = status;
13453                 return status;
13454             }
13455         }
13456
13457         chars = SvCUR(utf16_buffer) >> 1;
13458         have = SvCUR(utf8_buffer);
13459         SvGROW(utf8_buffer, have + chars * 3 + 1);
13460
13461         if (reverse) {
13462             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13463                                          (U8*)SvPVX_const(utf8_buffer) + have,
13464                                          chars * 2, &newlen);
13465         } else {
13466             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13467                                 (U8*)SvPVX_const(utf8_buffer) + have,
13468                                 chars * 2, &newlen);
13469         }
13470         SvCUR_set(utf8_buffer, have + newlen);
13471         *end = '\0';
13472
13473         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13474            it's private to us, and utf16_to_utf8{,reversed} take a
13475            (pointer,length) pair, rather than a NUL-terminated string.  */
13476         if(SvCUR(utf16_buffer) & 1) {
13477             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13478             SvCUR_set(utf16_buffer, 1);
13479         } else {
13480             SvCUR_set(utf16_buffer, 0);
13481         }
13482     }
13483     DEBUG_P(PerlIO_printf(Perl_debug_log,
13484                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13485                           status,
13486                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13487     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13488     return retval;
13489 }
13490
13491 static U8 *
13492 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13493 {
13494     SV *filter = filter_add(S_utf16_textfilter, NULL);
13495
13496     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13497     sv_setpvs(filter, "");
13498     IoLINES(filter) = reversed;
13499     IoPAGE(filter) = 1; /* Not EOF */
13500
13501     /* Sadly, we have to return a valid pointer, come what may, so we have to
13502        ignore any error return from this.  */
13503     SvCUR_set(PL_linestr, 0);
13504     if (FILTER_READ(0, PL_linestr, 0)) {
13505         SvUTF8_on(PL_linestr);
13506     } else {
13507         SvUTF8_on(PL_linestr);
13508     }
13509     PL_bufend = SvEND(PL_linestr);
13510     return (U8*)SvPVX(PL_linestr);
13511 }
13512 #endif
13513
13514 /*
13515 Returns a pointer to the next character after the parsed
13516 vstring, as well as updating the passed in sv.
13517
13518 Function must be called like
13519
13520         sv = newSV(5);
13521         s = scan_vstring(s,e,sv);
13522
13523 where s and e are the start and end of the string.
13524 The sv should already be large enough to store the vstring
13525 passed in, for performance reasons.
13526
13527 */
13528
13529 char *
13530 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13531 {
13532     dVAR;
13533     const char *pos = s;
13534     const char *start = s;
13535
13536     PERL_ARGS_ASSERT_SCAN_VSTRING;
13537
13538     if (*pos == 'v') pos++;  /* get past 'v' */
13539     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13540         pos++;
13541     if ( *pos != '.') {
13542         /* this may not be a v-string if followed by => */
13543         const char *next = pos;
13544         while (next < e && isSPACE(*next))
13545             ++next;
13546         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13547             /* return string not v-string */
13548             sv_setpvn(sv,(char *)s,pos-s);
13549             return (char *)pos;
13550         }
13551     }
13552
13553     if (!isALPHA(*pos)) {
13554         U8 tmpbuf[UTF8_MAXBYTES+1];
13555
13556         if (*s == 'v')
13557             s++;  /* get past 'v' */
13558
13559         sv_setpvs(sv, "");
13560
13561         for (;;) {
13562             /* this is atoi() that tolerates underscores */
13563             U8 *tmpend;
13564             UV rev = 0;
13565             const char *end = pos;
13566             UV mult = 1;
13567             while (--end >= s) {
13568                 if (*end != '_') {
13569                     const UV orev = rev;
13570                     rev += (*end - '0') * mult;
13571                     mult *= 10;
13572                     if (orev > rev)
13573                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13574                                          "Integer overflow in decimal number");
13575                 }
13576             }
13577 #ifdef EBCDIC
13578             if (rev > 0x7FFFFFFF)
13579                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13580 #endif
13581             /* Append native character for the rev point */
13582             tmpend = uvchr_to_utf8(tmpbuf, rev);
13583             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13584             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13585                  SvUTF8_on(sv);
13586             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13587                  s = ++pos;
13588             else {
13589                  s = pos;
13590                  break;
13591             }
13592             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13593                  pos++;
13594         }
13595         SvPOK_on(sv);
13596         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13597         SvRMAGICAL_on(sv);
13598     }
13599     return (char *)s;
13600 }
13601
13602 int
13603 Perl_keyword_plugin_standard(pTHX_
13604         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13605 {
13606     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13607     PERL_UNUSED_CONTEXT;
13608     PERL_UNUSED_ARG(keyword_ptr);
13609     PERL_UNUSED_ARG(keyword_len);
13610     PERL_UNUSED_ARG(op_ptr);
13611     return KEYWORD_PLUGIN_DECLINE;
13612 }
13613
13614 /*
13615  * Local variables:
13616  * c-indentation-style: bsd
13617  * c-basic-offset: 4
13618  * indent-tabs-mode: t
13619  * End:
13620  *
13621  * ex: set ts=8 sts=4 sw=4 noet:
13622  */