toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("switch")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 940 {
 941     char *bufptr;
 942     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 943     if (flags & ~(LEX_STUFF_UTF8))
 944         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 945     if (UTF) {
 946         if (flags & LEX_STUFF_UTF8) {
 947             goto plain_copy;
 948         } else {
 949             STRLEN highhalf = 0;
 950             char *p, *e = pv+len;
 951             for (p = pv; p != e; p++)
 952                 highhalf += !!(((U8)*p) & 0x80);
 953             if (!highhalf)
 954                 goto plain_copy;
 955             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 956             bufptr = PL_parser->bufptr;
 957             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 958             PL_parser->bufend += len+highhalf;
 959             for (p = pv; p != e; p++) {
 960                 U8 c = (U8)*p;
 961                 if (c & 0x80) {
 962                     *bufptr++ = (char)(0xc0 | (c >> 6));
 963                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 964                 } else {
 965                     *bufptr++ = (char)c;
 966                 }
 967             }
 968         }
 969     } else {
 970         if (flags & LEX_STUFF_UTF8) {
 971             STRLEN highhalf = 0;
 972             char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 U8 c = (U8)*p;
 975                 if (c >= 0xc4) {
 976                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 977                                 "non-Latin-1 character into Latin-1 input");
 978                 } else if (c >= 0xc2 && p+1 != e &&
 979                             (((U8)p[1]) & 0xc0) == 0x80) {
 980                     p++;
 981                     highhalf++;
 982                 } else if (c >= 0x80) {
 983                     /* malformed UTF-8 */
 984                     ENTER;
 985                     SAVESPTR(PL_warnhook);
 986                     PL_warnhook = PERL_WARNHOOK_FATAL;
 987                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 988                     LEAVE;
 989                 }
 990             }
 991             if (!highhalf)
 992                 goto plain_copy;
 993             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 994             bufptr = PL_parser->bufptr;
 995             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 996             PL_parser->bufend += len-highhalf;
 997             for (p = pv; p != e; p++) {
 998                 U8 c = (U8)*p;
 999                 if (c & 0x80) {
1000                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001                     p++;
1002                 } else {
1003                     *bufptr++ = (char)c;
1004                 }
1005             }
1006         } else {
1007             plain_copy:
1008             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009             bufptr = PL_parser->bufptr;
1010             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is the string value of I<sv>.  The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039 {
1040     char *pv;
1041     STRLEN len;
1042     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043     if (flags)
1044         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045     pv = SvPV(sv, len);
1046     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1056
1057 This is not the normal way to consume lexed text.  For that, use
1058 L</lex_read_to>.
1059
1060 =cut
1061 */
1062
1063 void
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1065 {
1066     char *buf, *bufend;
1067     STRLEN unstuff_len;
1068     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069     buf = PL_parser->bufptr;
1070     if (ptr < buf)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072     if (ptr == buf)
1073         return;
1074     bufend = PL_parser->bufend;
1075     if (ptr > bufend)
1076         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077     unstuff_len = ptr - buf;
1078     Move(ptr, buf, bufend+1-ptr, char);
1079     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080     PL_parser->bufend = bufend - unstuff_len;
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1085
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1090
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_read_to(pTHX_ char *ptr)
1100 {
1101     char *s;
1102     PERL_ARGS_ASSERT_LEX_READ_TO;
1103     s = PL_parser->bufptr;
1104     if (ptr < s || ptr > PL_parser->bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106     for (; s != ptr; s++)
1107         if (*s == '\n') {
1108             CopLINE_inc(PL_curcop);
1109             PL_parser->linestart = s+1;
1110         }
1111     PL_parser->bufptr = ptr;
1112 }
1113
1114 /*
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>.  The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately.  I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1122
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it.  However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer fof
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1130
1131 =cut
1132 */
1133
1134 void
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1136 {
1137     char *buf;
1138     STRLEN discard_len;
1139     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140     buf = SvPVX(PL_parser->linestr);
1141     if (ptr < buf)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143     if (ptr == buf)
1144         return;
1145     if (ptr > PL_parser->bufptr)
1146         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147     discard_len = ptr - buf;
1148     if (PL_parser->oldbufptr < ptr)
1149         PL_parser->oldbufptr = ptr;
1150     if (PL_parser->oldoldbufptr < ptr)
1151         PL_parser->oldoldbufptr = ptr;
1152     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153         PL_parser->last_uni = NULL;
1154     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155         PL_parser->last_lop = NULL;
1156     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158     PL_parser->bufend -= discard_len;
1159     PL_parser->bufptr -= discard_len;
1160     PL_parser->oldbufptr -= discard_len;
1161     PL_parser->oldoldbufptr -= discard_len;
1162     if (PL_parser->last_uni)
1163         PL_parser->last_uni -= discard_len;
1164     if (PL_parser->last_lop)
1165         PL_parser->last_lop -= discard_len;
1166 }
1167
1168 /*
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more.  It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1176
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded.  If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1183
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1186
1187 =cut
1188 */
1189
1190 #define LEX_FAKE_EOF 0x80000000
1191
1192 bool
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1194 {
1195     SV *linestr;
1196     char *buf;
1197     STRLEN old_bufend_pos, new_bufend_pos;
1198     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200     bool got_some;
1201     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1202         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1203 #ifdef PERL_MAD
1204     flags |= LEX_KEEP_PREVIOUS;
1205 #endif /* PERL_MAD */
1206     linestr = PL_parser->linestr;
1207     buf = SvPVX(linestr);
1208     if (!(flags & LEX_KEEP_PREVIOUS) &&
1209             PL_parser->bufptr == PL_parser->bufend) {
1210         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1211         linestart_pos = 0;
1212         if (PL_parser->last_uni != PL_parser->bufend)
1213             PL_parser->last_uni = NULL;
1214         if (PL_parser->last_lop != PL_parser->bufend)
1215             PL_parser->last_lop = NULL;
1216         last_uni_pos = last_lop_pos = 0;
1217         *buf = 0;
1218         SvCUR(linestr) = 0;
1219     } else {
1220         old_bufend_pos = PL_parser->bufend - buf;
1221         bufptr_pos = PL_parser->bufptr - buf;
1222         oldbufptr_pos = PL_parser->oldbufptr - buf;
1223         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1224         linestart_pos = PL_parser->linestart - buf;
1225         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1226         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1227     }
1228     if (flags & LEX_FAKE_EOF) {
1229         goto eof;
1230     } else if (!PL_parser->rsfp) {
1231         got_some = 0;
1232     } else if (filter_gets(linestr, old_bufend_pos)) {
1233         got_some = 1;
1234     } else {
1235         eof:
1236         /* End of real input.  Close filehandle (unless it was STDIN),
1237          * then add implicit termination.
1238          */
1239         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1240             PerlIO_clearerr(PL_parser->rsfp);
1241         else if (PL_parser->rsfp)
1242             (void)PerlIO_close(PL_parser->rsfp);
1243         PL_parser->rsfp = NULL;
1244         PL_doextract = FALSE;
1245 #ifdef PERL_MAD
1246         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1247             PL_faketokens = 1;
1248 #endif
1249         if (!PL_in_eval && PL_minus_p) {
1250             sv_catpvs(linestr,
1251                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1252             PL_minus_n = PL_minus_p = 0;
1253         } else if (!PL_in_eval && PL_minus_n) {
1254             sv_catpvs(linestr, /*{*/";}");
1255             PL_minus_n = 0;
1256         } else
1257             sv_catpvs(linestr, ";");
1258         got_some = 1;
1259     }
1260     buf = SvPVX(linestr);
1261     new_bufend_pos = SvCUR(linestr);
1262     PL_parser->bufend = buf + new_bufend_pos;
1263     PL_parser->bufptr = buf + bufptr_pos;
1264     PL_parser->oldbufptr = buf + oldbufptr_pos;
1265     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1266     PL_parser->linestart = buf + linestart_pos;
1267     if (PL_parser->last_uni)
1268         PL_parser->last_uni = buf + last_uni_pos;
1269     if (PL_parser->last_lop)
1270         PL_parser->last_lop = buf + last_lop_pos;
1271     if (got_some && (PERLDB_LINE || PERLDB_SAVESRC) &&
1272             PL_curstash != PL_debstash) {
1273         /* debugger active and we're not compiling the debugger code,
1274          * so store the line into the debugger's array of lines
1275          */
1276         update_debugger_info(NULL, buf+old_bufend_pos,
1277             new_bufend_pos-old_bufend_pos);
1278     }
1279     return got_some;
1280 }
1281
1282 /*
1283 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1284
1285 Looks ahead one (Unicode) character in the text currently being lexed.
1286 Returns the codepoint (unsigned integer value) of the next character,
1287 or -1 if lexing has reached the end of the input text.  To consume the
1288 peeked character, use L</lex_read_unichar>.
1289
1290 If the next character is in (or extends into) the next chunk of input
1291 text, the next chunk will be read in.  Normally the current chunk will be
1292 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1293 then the current chunk will not be discarded.
1294
1295 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1296 is encountered, an exception is generated.
1297
1298 =cut
1299 */
1300
1301 I32
1302 Perl_lex_peek_unichar(pTHX_ U32 flags)
1303 {
1304     char *s, *bufend;
1305     if (flags & ~(LEX_KEEP_PREVIOUS))
1306         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1307     s = PL_parser->bufptr;
1308     bufend = PL_parser->bufend;
1309     if (UTF) {
1310         U8 head;
1311         I32 unichar;
1312         STRLEN len, retlen;
1313         if (s == bufend) {
1314             if (!lex_next_chunk(flags))
1315                 return -1;
1316             s = PL_parser->bufptr;
1317             bufend = PL_parser->bufend;
1318         }
1319         head = (U8)*s;
1320         if (!(head & 0x80))
1321             return head;
1322         if (head & 0x40) {
1323             len = PL_utf8skip[head];
1324             while ((STRLEN)(bufend-s) < len) {
1325                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1326                     break;
1327                 s = PL_parser->bufptr;
1328                 bufend = PL_parser->bufend;
1329             }
1330         }
1331         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1332         if (retlen == (STRLEN)-1) {
1333             /* malformed UTF-8 */
1334             ENTER;
1335             SAVESPTR(PL_warnhook);
1336             PL_warnhook = PERL_WARNHOOK_FATAL;
1337             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1338             LEAVE;
1339         }
1340         return unichar;
1341     } else {
1342         if (s == bufend) {
1343             if (!lex_next_chunk(flags))
1344                 return -1;
1345             s = PL_parser->bufptr;
1346         }
1347         return (U8)*s;
1348     }
1349 }
1350
1351 /*
1352 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1353
1354 Reads the next (Unicode) character in the text currently being lexed.
1355 Returns the codepoint (unsigned integer value) of the character read,
1356 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1357 if lexing has reached the end of the input text.  To non-destructively
1358 examine the next character, use L</lex_peek_unichar> instead.
1359
1360 If the next character is in (or extends into) the next chunk of input
1361 text, the next chunk will be read in.  Normally the current chunk will be
1362 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1363 then the current chunk will not be discarded.
1364
1365 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1366 is encountered, an exception is generated.
1367
1368 =cut
1369 */
1370
1371 I32
1372 Perl_lex_read_unichar(pTHX_ U32 flags)
1373 {
1374     I32 c;
1375     if (flags & ~(LEX_KEEP_PREVIOUS))
1376         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1377     c = lex_peek_unichar(flags);
1378     if (c != -1) {
1379         if (c == '\n')
1380             CopLINE_inc(PL_curcop);
1381         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1382     }
1383     return c;
1384 }
1385
1386 /*
1387 =for apidoc Amx|void|lex_read_space|U32 flags
1388
1389 Reads optional spaces, in Perl style, in the text currently being
1390 lexed.  The spaces may include ordinary whitespace characters and
1391 Perl-style comments.  C<#line> directives are processed if encountered.
1392 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1393 at a non-space character (or the end of the input text).
1394
1395 If spaces extend into the next chunk of input text, the next chunk will
1396 be read in.  Normally the current chunk will be discarded at the same
1397 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1398 chunk will not be discarded.
1399
1400 =cut
1401 */
1402
1403 void
1404 Perl_lex_read_space(pTHX_ U32 flags)
1405 {
1406     char *s, *bufend;
1407     bool need_incline = 0;
1408     if (flags & ~(LEX_KEEP_PREVIOUS))
1409         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1410 #ifdef PERL_MAD
1411     if (PL_skipwhite) {
1412         sv_free(PL_skipwhite);
1413         PL_skipwhite = NULL;
1414     }
1415     if (PL_madskills)
1416         PL_skipwhite = newSVpvs("");
1417 #endif /* PERL_MAD */
1418     s = PL_parser->bufptr;
1419     bufend = PL_parser->bufend;
1420     while (1) {
1421         char c = *s;
1422         if (c == '#') {
1423             do {
1424                 c = *++s;
1425             } while (!(c == '\n' || (c == 0 && s == bufend)));
1426         } else if (c == '\n') {
1427             s++;
1428             PL_parser->linestart = s;
1429             if (s == bufend)
1430                 need_incline = 1;
1431             else
1432                 incline(s);
1433         } else if (isSPACE(c)) {
1434             s++;
1435         } else if (c == 0 && s == bufend) {
1436             bool got_more;
1437 #ifdef PERL_MAD
1438             if (PL_madskills)
1439                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1440 #endif /* PERL_MAD */
1441             PL_parser->bufptr = s;
1442             CopLINE_inc(PL_curcop);
1443             got_more = lex_next_chunk(flags);
1444             CopLINE_dec(PL_curcop);
1445             s = PL_parser->bufptr;
1446             bufend = PL_parser->bufend;
1447             if (!got_more)
1448                 break;
1449             if (need_incline && PL_parser->rsfp) {
1450                 incline(s);
1451                 need_incline = 0;
1452             }
1453         } else {
1454             break;
1455         }
1456     }
1457 #ifdef PERL_MAD
1458     if (PL_madskills)
1459         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1460 #endif /* PERL_MAD */
1461     PL_parser->bufptr = s;
1462 }
1463
1464 /*
1465  * S_incline
1466  * This subroutine has nothing to do with tilting, whether at windmills
1467  * or pinball tables.  Its name is short for "increment line".  It
1468  * increments the current line number in CopLINE(PL_curcop) and checks
1469  * to see whether the line starts with a comment of the form
1470  *    # line 500 "foo.pm"
1471  * If so, it sets the current line number and file to the values in the comment.
1472  */
1473
1474 STATIC void
1475 S_incline(pTHX_ const char *s)
1476 {
1477     dVAR;
1478     const char *t;
1479     const char *n;
1480     const char *e;
1481
1482     PERL_ARGS_ASSERT_INCLINE;
1483
1484     CopLINE_inc(PL_curcop);
1485     if (*s++ != '#')
1486         return;
1487     while (SPACE_OR_TAB(*s))
1488         s++;
1489     if (strnEQ(s, "line", 4))
1490         s += 4;
1491     else
1492         return;
1493     if (SPACE_OR_TAB(*s))
1494         s++;
1495     else
1496         return;
1497     while (SPACE_OR_TAB(*s))
1498         s++;
1499     if (!isDIGIT(*s))
1500         return;
1501
1502     n = s;
1503     while (isDIGIT(*s))
1504         s++;
1505     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1506         return;
1507     while (SPACE_OR_TAB(*s))
1508         s++;
1509     if (*s == '"' && (t = strchr(s+1, '"'))) {
1510         s++;
1511         e = t + 1;
1512     }
1513     else {
1514         t = s;
1515         while (!isSPACE(*t))
1516             t++;
1517         e = t;
1518     }
1519     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1520         e++;
1521     if (*e != '\n' && *e != '\0')
1522         return;         /* false alarm */
1523
1524     if (t - s > 0) {
1525         const STRLEN len = t - s;
1526 #ifndef USE_ITHREADS
1527         SV *const temp_sv = CopFILESV(PL_curcop);
1528         const char *cf;
1529         STRLEN tmplen;
1530
1531         if (temp_sv) {
1532             cf = SvPVX(temp_sv);
1533             tmplen = SvCUR(temp_sv);
1534         } else {
1535             cf = NULL;
1536             tmplen = 0;
1537         }
1538
1539         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1540             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1541              * to *{"::_<newfilename"} */
1542             /* However, the long form of evals is only turned on by the
1543                debugger - usually they're "(eval %lu)" */
1544             char smallbuf[128];
1545             char *tmpbuf;
1546             GV **gvp;
1547             STRLEN tmplen2 = len;
1548             if (tmplen + 2 <= sizeof smallbuf)
1549                 tmpbuf = smallbuf;
1550             else
1551                 Newx(tmpbuf, tmplen + 2, char);
1552             tmpbuf[0] = '_';
1553             tmpbuf[1] = '<';
1554             memcpy(tmpbuf + 2, cf, tmplen);
1555             tmplen += 2;
1556             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1557             if (gvp) {
1558                 char *tmpbuf2;
1559                 GV *gv2;
1560
1561                 if (tmplen2 + 2 <= sizeof smallbuf)
1562                     tmpbuf2 = smallbuf;
1563                 else
1564                     Newx(tmpbuf2, tmplen2 + 2, char);
1565
1566                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1567                     /* Either they malloc'd it, or we malloc'd it,
1568                        so no prefix is present in ours.  */
1569                     tmpbuf2[0] = '_';
1570                     tmpbuf2[1] = '<';
1571                 }
1572
1573                 memcpy(tmpbuf2 + 2, s, tmplen2);
1574                 tmplen2 += 2;
1575
1576                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1577                 if (!isGV(gv2)) {
1578                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1579                     /* adjust ${"::_<newfilename"} to store the new file name */
1580                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1581                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1582                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1583                 }
1584
1585                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1586             }
1587             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1588         }
1589 #endif
1590         CopFILE_free(PL_curcop);
1591         CopFILE_setn(PL_curcop, s, len);
1592     }
1593     CopLINE_set(PL_curcop, atoi(n)-1);
1594 }
1595
1596 #ifdef PERL_MAD
1597 /* skip space before PL_thistoken */
1598
1599 STATIC char *
1600 S_skipspace0(pTHX_ register char *s)
1601 {
1602     PERL_ARGS_ASSERT_SKIPSPACE0;
1603
1604     s = skipspace(s);
1605     if (!PL_madskills)
1606         return s;
1607     if (PL_skipwhite) {
1608         if (!PL_thiswhite)
1609             PL_thiswhite = newSVpvs("");
1610         sv_catsv(PL_thiswhite, PL_skipwhite);
1611         sv_free(PL_skipwhite);
1612         PL_skipwhite = 0;
1613     }
1614     PL_realtokenstart = s - SvPVX(PL_linestr);
1615     return s;
1616 }
1617
1618 /* skip space after PL_thistoken */
1619
1620 STATIC char *
1621 S_skipspace1(pTHX_ register char *s)
1622 {
1623     const char *start = s;
1624     I32 startoff = start - SvPVX(PL_linestr);
1625
1626     PERL_ARGS_ASSERT_SKIPSPACE1;
1627
1628     s = skipspace(s);
1629     if (!PL_madskills)
1630         return s;
1631     start = SvPVX(PL_linestr) + startoff;
1632     if (!PL_thistoken && PL_realtokenstart >= 0) {
1633         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1634         PL_thistoken = newSVpvn(tstart, start - tstart);
1635     }
1636     PL_realtokenstart = -1;
1637     if (PL_skipwhite) {
1638         if (!PL_nextwhite)
1639             PL_nextwhite = newSVpvs("");
1640         sv_catsv(PL_nextwhite, PL_skipwhite);
1641         sv_free(PL_skipwhite);
1642         PL_skipwhite = 0;
1643     }
1644     return s;
1645 }
1646
1647 STATIC char *
1648 S_skipspace2(pTHX_ register char *s, SV **svp)
1649 {
1650     char *start;
1651     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1652     const I32 startoff = s - SvPVX(PL_linestr);
1653
1654     PERL_ARGS_ASSERT_SKIPSPACE2;
1655
1656     s = skipspace(s);
1657     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1658     if (!PL_madskills || !svp)
1659         return s;
1660     start = SvPVX(PL_linestr) + startoff;
1661     if (!PL_thistoken && PL_realtokenstart >= 0) {
1662         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1663         PL_thistoken = newSVpvn(tstart, start - tstart);
1664         PL_realtokenstart = -1;
1665     }
1666     if (PL_skipwhite) {
1667         if (!*svp)
1668             *svp = newSVpvs("");
1669         sv_setsv(*svp, PL_skipwhite);
1670         sv_free(PL_skipwhite);
1671         PL_skipwhite = 0;
1672     }
1673
1674     return s;
1675 }
1676 #endif
1677
1678 STATIC void
1679 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1680 {
1681     AV *av = CopFILEAVx(PL_curcop);
1682     if (av) {
1683         SV * const sv = newSV_type(SVt_PVMG);
1684         if (orig_sv)
1685             sv_setsv(sv, orig_sv);
1686         else
1687             sv_setpvn(sv, buf, len);
1688         (void)SvIOK_on(sv);
1689         SvIV_set(sv, 0);
1690         av_store(av, (I32)CopLINE(PL_curcop), sv);
1691     }
1692 }
1693
1694 /*
1695  * S_skipspace
1696  * Called to gobble the appropriate amount and type of whitespace.
1697  * Skips comments as well.
1698  */
1699
1700 STATIC char *
1701 S_skipspace(pTHX_ register char *s)
1702 {
1703 #ifdef PERL_MAD
1704     char *start = s;
1705 #endif /* PERL_MAD */
1706     PERL_ARGS_ASSERT_SKIPSPACE;
1707 #ifdef PERL_MAD
1708     if (PL_skipwhite) {
1709         sv_free(PL_skipwhite);
1710         PL_skipwhite = NULL;
1711     }
1712 #endif /* PERL_MAD */
1713     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1714         while (s < PL_bufend && SPACE_OR_TAB(*s))
1715             s++;
1716     } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1717         while (isSPACE(*s) && *s != '\n')
1718             s++;
1719         if (*s == '#') {
1720             do {
1721                 s++;
1722             } while (s != PL_bufend && *s != '\n');
1723         }
1724         if (*s == '\n')
1725             s++;
1726     } else {
1727         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1728         PL_bufptr = s;
1729         lex_read_space(LEX_KEEP_PREVIOUS);
1730         s = PL_bufptr;
1731         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1732         if (PL_linestart > PL_bufptr)
1733             PL_bufptr = PL_linestart;
1734         return s;
1735     }
1736 #ifdef PERL_MAD
1737     if (PL_madskills)
1738         PL_skipwhite = newSVpvn(start, s-start);
1739 #endif /* PERL_MAD */
1740     return s;
1741 }
1742
1743 /*
1744  * S_check_uni
1745  * Check the unary operators to ensure there's no ambiguity in how they're
1746  * used.  An ambiguous piece of code would be:
1747  *     rand + 5
1748  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1749  * the +5 is its argument.
1750  */
1751
1752 STATIC void
1753 S_check_uni(pTHX)
1754 {
1755     dVAR;
1756     const char *s;
1757     const char *t;
1758
1759     if (PL_oldoldbufptr != PL_last_uni)
1760         return;
1761     while (isSPACE(*PL_last_uni))
1762         PL_last_uni++;
1763     s = PL_last_uni;
1764     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1765         s++;
1766     if ((t = strchr(s, '(')) && t < PL_bufptr)
1767         return;
1768
1769     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1770                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1771                      (int)(s - PL_last_uni), PL_last_uni);
1772 }
1773
1774 /*
1775  * LOP : macro to build a list operator.  Its behaviour has been replaced
1776  * with a subroutine, S_lop() for which LOP is just another name.
1777  */
1778
1779 #define LOP(f,x) return lop(f,x,s)
1780
1781 /*
1782  * S_lop
1783  * Build a list operator (or something that might be one).  The rules:
1784  *  - if we have a next token, then it's a list operator [why?]
1785  *  - if the next thing is an opening paren, then it's a function
1786  *  - else it's a list operator
1787  */
1788
1789 STATIC I32
1790 S_lop(pTHX_ I32 f, int x, char *s)
1791 {
1792     dVAR;
1793
1794     PERL_ARGS_ASSERT_LOP;
1795
1796     pl_yylval.ival = f;
1797     CLINE;
1798     PL_expect = x;
1799     PL_bufptr = s;
1800     PL_last_lop = PL_oldbufptr;
1801     PL_last_lop_op = (OPCODE)f;
1802 #ifdef PERL_MAD
1803     if (PL_lasttoke)
1804         return REPORT(LSTOP);
1805 #else
1806     if (PL_nexttoke)
1807         return REPORT(LSTOP);
1808 #endif
1809     if (*s == '(')
1810         return REPORT(FUNC);
1811     s = PEEKSPACE(s);
1812     if (*s == '(')
1813         return REPORT(FUNC);
1814     else
1815         return REPORT(LSTOP);
1816 }
1817
1818 #ifdef PERL_MAD
1819  /*
1820  * S_start_force
1821  * Sets up for an eventual force_next().  start_force(0) basically does
1822  * an unshift, while start_force(-1) does a push.  yylex removes items
1823  * on the "pop" end.
1824  */
1825
1826 STATIC void
1827 S_start_force(pTHX_ int where)
1828 {
1829     int i;
1830
1831     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1832         where = PL_lasttoke;
1833     assert(PL_curforce < 0 || PL_curforce == where);
1834     if (PL_curforce != where) {
1835         for (i = PL_lasttoke; i > where; --i) {
1836             PL_nexttoke[i] = PL_nexttoke[i-1];
1837         }
1838         PL_lasttoke++;
1839     }
1840     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1841         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1842     PL_curforce = where;
1843     if (PL_nextwhite) {
1844         if (PL_madskills)
1845             curmad('^', newSVpvs(""));
1846         CURMAD('_', PL_nextwhite);
1847     }
1848 }
1849
1850 STATIC void
1851 S_curmad(pTHX_ char slot, SV *sv)
1852 {
1853     MADPROP **where;
1854
1855     if (!sv)
1856         return;
1857     if (PL_curforce < 0)
1858         where = &PL_thismad;
1859     else
1860         where = &PL_nexttoke[PL_curforce].next_mad;
1861
1862     if (PL_faketokens)
1863         sv_setpvs(sv, "");
1864     else {
1865         if (!IN_BYTES) {
1866             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1867                 SvUTF8_on(sv);
1868             else if (PL_encoding) {
1869                 sv_recode_to_utf8(sv, PL_encoding);
1870             }
1871         }
1872     }
1873
1874     /* keep a slot open for the head of the list? */
1875     if (slot != '_' && *where && (*where)->mad_key == '^') {
1876         (*where)->mad_key = slot;
1877         sv_free(MUTABLE_SV(((*where)->mad_val)));
1878         (*where)->mad_val = (void*)sv;
1879     }
1880     else
1881         addmad(newMADsv(slot, sv), where, 0);
1882 }
1883 #else
1884 #  define start_force(where)    NOOP
1885 #  define curmad(slot, sv)      NOOP
1886 #endif
1887
1888 /*
1889  * S_force_next
1890  * When the lexer realizes it knows the next token (for instance,
1891  * it is reordering tokens for the parser) then it can call S_force_next
1892  * to know what token to return the next time the lexer is called.  Caller
1893  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1894  * and possibly PL_expect to ensure the lexer handles the token correctly.
1895  */
1896
1897 STATIC void
1898 S_force_next(pTHX_ I32 type)
1899 {
1900     dVAR;
1901 #ifdef DEBUGGING
1902     if (DEBUG_T_TEST) {
1903         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1904         tokereport(type, &NEXTVAL_NEXTTOKE);
1905     }
1906 #endif
1907 #ifdef PERL_MAD
1908     if (PL_curforce < 0)
1909         start_force(PL_lasttoke);
1910     PL_nexttoke[PL_curforce].next_type = type;
1911     if (PL_lex_state != LEX_KNOWNEXT)
1912         PL_lex_defer = PL_lex_state;
1913     PL_lex_state = LEX_KNOWNEXT;
1914     PL_lex_expect = PL_expect;
1915     PL_curforce = -1;
1916 #else
1917     PL_nexttype[PL_nexttoke] = type;
1918     PL_nexttoke++;
1919     if (PL_lex_state != LEX_KNOWNEXT) {
1920         PL_lex_defer = PL_lex_state;
1921         PL_lex_expect = PL_expect;
1922         PL_lex_state = LEX_KNOWNEXT;
1923     }
1924 #endif
1925 }
1926
1927 STATIC SV *
1928 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1929 {
1930     dVAR;
1931     SV * const sv = newSVpvn_utf8(start, len,
1932                                   !IN_BYTES
1933                                   && UTF
1934                                   && !is_ascii_string((const U8*)start, len)
1935                                   && is_utf8_string((const U8*)start, len));
1936     return sv;
1937 }
1938
1939 /*
1940  * S_force_word
1941  * When the lexer knows the next thing is a word (for instance, it has
1942  * just seen -> and it knows that the next char is a word char, then
1943  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1944  * lookahead.
1945  *
1946  * Arguments:
1947  *   char *start : buffer position (must be within PL_linestr)
1948  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1949  *   int check_keyword : if true, Perl checks to make sure the word isn't
1950  *       a keyword (do this if the word is a label, e.g. goto FOO)
1951  *   int allow_pack : if true, : characters will also be allowed (require,
1952  *       use, etc. do this)
1953  *   int allow_initial_tick : used by the "sub" lexer only.
1954  */
1955
1956 STATIC char *
1957 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1958 {
1959     dVAR;
1960     register char *s;
1961     STRLEN len;
1962
1963     PERL_ARGS_ASSERT_FORCE_WORD;
1964
1965     start = SKIPSPACE1(start);
1966     s = start;
1967     if (isIDFIRST_lazy_if(s,UTF) ||
1968         (allow_pack && *s == ':') ||
1969         (allow_initial_tick && *s == '\'') )
1970     {
1971         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1972         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1973             return start;
1974         start_force(PL_curforce);
1975         if (PL_madskills)
1976             curmad('X', newSVpvn(start,s-start));
1977         if (token == METHOD) {
1978             s = SKIPSPACE1(s);
1979             if (*s == '(')
1980                 PL_expect = XTERM;
1981             else {
1982                 PL_expect = XOPERATOR;
1983             }
1984         }
1985         if (PL_madskills)
1986             curmad('g', newSVpvs( "forced" ));
1987         NEXTVAL_NEXTTOKE.opval
1988             = (OP*)newSVOP(OP_CONST,0,
1989                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1990         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1991         force_next(token);
1992     }
1993     return s;
1994 }
1995
1996 /*
1997  * S_force_ident
1998  * Called when the lexer wants $foo *foo &foo etc, but the program
1999  * text only contains the "foo" portion.  The first argument is a pointer
2000  * to the "foo", and the second argument is the type symbol to prefix.
2001  * Forces the next token to be a "WORD".
2002  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2003  */
2004
2005 STATIC void
2006 S_force_ident(pTHX_ register const char *s, int kind)
2007 {
2008     dVAR;
2009
2010     PERL_ARGS_ASSERT_FORCE_IDENT;
2011
2012     if (*s) {
2013         const STRLEN len = strlen(s);
2014         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2015         start_force(PL_curforce);
2016         NEXTVAL_NEXTTOKE.opval = o;
2017         force_next(WORD);
2018         if (kind) {
2019             o->op_private = OPpCONST_ENTERED;
2020             /* XXX see note in pp_entereval() for why we forgo typo
2021                warnings if the symbol must be introduced in an eval.
2022                GSAR 96-10-12 */
2023             gv_fetchpvn_flags(s, len,
2024                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2025                               : GV_ADD,
2026                               kind == '$' ? SVt_PV :
2027                               kind == '@' ? SVt_PVAV :
2028                               kind == '%' ? SVt_PVHV :
2029                               SVt_PVGV
2030                               );
2031         }
2032     }
2033 }
2034
2035 NV
2036 Perl_str_to_version(pTHX_ SV *sv)
2037 {
2038     NV retval = 0.0;
2039     NV nshift = 1.0;
2040     STRLEN len;
2041     const char *start = SvPV_const(sv,len);
2042     const char * const end = start + len;
2043     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2044
2045     PERL_ARGS_ASSERT_STR_TO_VERSION;
2046
2047     while (start < end) {
2048         STRLEN skip;
2049         UV n;
2050         if (utf)
2051             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2052         else {
2053             n = *(U8*)start;
2054             skip = 1;
2055         }
2056         retval += ((NV)n)/nshift;
2057         start += skip;
2058         nshift *= 1000;
2059     }
2060     return retval;
2061 }
2062
2063 /*
2064  * S_force_version
2065  * Forces the next token to be a version number.
2066  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2067  * and if "guessing" is TRUE, then no new token is created (and the caller
2068  * must use an alternative parsing method).
2069  */
2070
2071 STATIC char *
2072 S_force_version(pTHX_ char *s, int guessing)
2073 {
2074     dVAR;
2075     OP *version = NULL;
2076     char *d;
2077 #ifdef PERL_MAD
2078     I32 startoff = s - SvPVX(PL_linestr);
2079 #endif
2080
2081     PERL_ARGS_ASSERT_FORCE_VERSION;
2082
2083     s = SKIPSPACE1(s);
2084
2085     d = s;
2086     if (*d == 'v')
2087         d++;
2088     if (isDIGIT(*d)) {
2089         while (isDIGIT(*d) || *d == '_' || *d == '.')
2090             d++;
2091 #ifdef PERL_MAD
2092         if (PL_madskills) {
2093             start_force(PL_curforce);
2094             curmad('X', newSVpvn(s,d-s));
2095         }
2096 #endif
2097         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2098             SV *ver;
2099             s = scan_num(s, &pl_yylval);
2100             version = pl_yylval.opval;
2101             ver = cSVOPx(version)->op_sv;
2102             if (SvPOK(ver) && !SvNIOK(ver)) {
2103                 SvUPGRADE(ver, SVt_PVNV);
2104                 SvNV_set(ver, str_to_version(ver));
2105                 SvNOK_on(ver);          /* hint that it is a version */
2106             }
2107         }
2108         else if (guessing) {
2109 #ifdef PERL_MAD
2110             if (PL_madskills) {
2111                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2112                 PL_nextwhite = 0;
2113                 s = SvPVX(PL_linestr) + startoff;
2114             }
2115 #endif
2116             return s;
2117         }
2118     }
2119
2120 #ifdef PERL_MAD
2121     if (PL_madskills && !version) {
2122         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2123         PL_nextwhite = 0;
2124         s = SvPVX(PL_linestr) + startoff;
2125     }
2126 #endif
2127     /* NOTE: The parser sees the package name and the VERSION swapped */
2128     start_force(PL_curforce);
2129     NEXTVAL_NEXTTOKE.opval = version;
2130     force_next(WORD);
2131
2132     return s;
2133 }
2134
2135 /*
2136  * S_tokeq
2137  * Tokenize a quoted string passed in as an SV.  It finds the next
2138  * chunk, up to end of string or a backslash.  It may make a new
2139  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2140  * turns \\ into \.
2141  */
2142
2143 STATIC SV *
2144 S_tokeq(pTHX_ SV *sv)
2145 {
2146     dVAR;
2147     register char *s;
2148     register char *send;
2149     register char *d;
2150     STRLEN len = 0;
2151     SV *pv = sv;
2152
2153     PERL_ARGS_ASSERT_TOKEQ;
2154
2155     if (!SvLEN(sv))
2156         goto finish;
2157
2158     s = SvPV_force(sv, len);
2159     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2160         goto finish;
2161     send = s + len;
2162     while (s < send && *s != '\\')
2163         s++;
2164     if (s == send)
2165         goto finish;
2166     d = s;
2167     if ( PL_hints & HINT_NEW_STRING ) {
2168         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2169     }
2170     while (s < send) {
2171         if (*s == '\\') {
2172             if (s + 1 < send && (s[1] == '\\'))
2173                 s++;            /* all that, just for this */
2174         }
2175         *d++ = *s++;
2176     }
2177     *d = '\0';
2178     SvCUR_set(sv, d - SvPVX_const(sv));
2179   finish:
2180     if ( PL_hints & HINT_NEW_STRING )
2181        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2182     return sv;
2183 }
2184
2185 /*
2186  * Now come three functions related to double-quote context,
2187  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2188  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2189  * interact with PL_lex_state, and create fake ( ... ) argument lists
2190  * to handle functions and concatenation.
2191  * They assume that whoever calls them will be setting up a fake
2192  * join call, because each subthing puts a ',' after it.  This lets
2193  *   "lower \luPpEr"
2194  * become
2195  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2196  *
2197  * (I'm not sure whether the spurious commas at the end of lcfirst's
2198  * arguments and join's arguments are created or not).
2199  */
2200
2201 /*
2202  * S_sublex_start
2203  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2204  *
2205  * Pattern matching will set PL_lex_op to the pattern-matching op to
2206  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2207  *
2208  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2209  *
2210  * Everything else becomes a FUNC.
2211  *
2212  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2213  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2214  * call to S_sublex_push().
2215  */
2216
2217 STATIC I32
2218 S_sublex_start(pTHX)
2219 {
2220     dVAR;
2221     register const I32 op_type = pl_yylval.ival;
2222
2223     if (op_type == OP_NULL) {
2224         pl_yylval.opval = PL_lex_op;
2225         PL_lex_op = NULL;
2226         return THING;
2227     }
2228     if (op_type == OP_CONST || op_type == OP_READLINE) {
2229         SV *sv = tokeq(PL_lex_stuff);
2230
2231         if (SvTYPE(sv) == SVt_PVIV) {
2232             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2233             STRLEN len;
2234             const char * const p = SvPV_const(sv, len);
2235             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2236             SvREFCNT_dec(sv);
2237             sv = nsv;
2238         }
2239         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2240         PL_lex_stuff = NULL;
2241         /* Allow <FH> // "foo" */
2242         if (op_type == OP_READLINE)
2243             PL_expect = XTERMORDORDOR;
2244         return THING;
2245     }
2246     else if (op_type == OP_BACKTICK && PL_lex_op) {
2247         /* readpipe() vas overriden */
2248         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2249         pl_yylval.opval = PL_lex_op;
2250         PL_lex_op = NULL;
2251         PL_lex_stuff = NULL;
2252         return THING;
2253     }
2254
2255     PL_sublex_info.super_state = PL_lex_state;
2256     PL_sublex_info.sub_inwhat = (U16)op_type;
2257     PL_sublex_info.sub_op = PL_lex_op;
2258     PL_lex_state = LEX_INTERPPUSH;
2259
2260     PL_expect = XTERM;
2261     if (PL_lex_op) {
2262         pl_yylval.opval = PL_lex_op;
2263         PL_lex_op = NULL;
2264         return PMFUNC;
2265     }
2266     else
2267         return FUNC;
2268 }
2269
2270 /*
2271  * S_sublex_push
2272  * Create a new scope to save the lexing state.  The scope will be
2273  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2274  * to the uc, lc, etc. found before.
2275  * Sets PL_lex_state to LEX_INTERPCONCAT.
2276  */
2277
2278 STATIC I32
2279 S_sublex_push(pTHX)
2280 {
2281     dVAR;
2282     ENTER;
2283
2284     PL_lex_state = PL_sublex_info.super_state;
2285     SAVEBOOL(PL_lex_dojoin);
2286     SAVEI32(PL_lex_brackets);
2287     SAVEI32(PL_lex_casemods);
2288     SAVEI32(PL_lex_starts);
2289     SAVEI8(PL_lex_state);
2290     SAVEVPTR(PL_lex_inpat);
2291     SAVEI16(PL_lex_inwhat);
2292     SAVECOPLINE(PL_curcop);
2293     SAVEPPTR(PL_bufptr);
2294     SAVEPPTR(PL_bufend);
2295     SAVEPPTR(PL_oldbufptr);
2296     SAVEPPTR(PL_oldoldbufptr);
2297     SAVEPPTR(PL_last_lop);
2298     SAVEPPTR(PL_last_uni);
2299     SAVEPPTR(PL_linestart);
2300     SAVESPTR(PL_linestr);
2301     SAVEGENERICPV(PL_lex_brackstack);
2302     SAVEGENERICPV(PL_lex_casestack);
2303
2304     PL_linestr = PL_lex_stuff;
2305     PL_lex_stuff = NULL;
2306
2307     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2308         = SvPVX(PL_linestr);
2309     PL_bufend += SvCUR(PL_linestr);
2310     PL_last_lop = PL_last_uni = NULL;
2311     SAVEFREESV(PL_linestr);
2312
2313     PL_lex_dojoin = FALSE;
2314     PL_lex_brackets = 0;
2315     Newx(PL_lex_brackstack, 120, char);
2316     Newx(PL_lex_casestack, 12, char);
2317     PL_lex_casemods = 0;
2318     *PL_lex_casestack = '\0';
2319     PL_lex_starts = 0;
2320     PL_lex_state = LEX_INTERPCONCAT;
2321     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2322
2323     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2324     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2325         PL_lex_inpat = PL_sublex_info.sub_op;
2326     else
2327         PL_lex_inpat = NULL;
2328
2329     return '(';
2330 }
2331
2332 /*
2333  * S_sublex_done
2334  * Restores lexer state after a S_sublex_push.
2335  */
2336
2337 STATIC I32
2338 S_sublex_done(pTHX)
2339 {
2340     dVAR;
2341     if (!PL_lex_starts++) {
2342         SV * const sv = newSVpvs("");
2343         if (SvUTF8(PL_linestr))
2344             SvUTF8_on(sv);
2345         PL_expect = XOPERATOR;
2346         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2347         return THING;
2348     }
2349
2350     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2351         PL_lex_state = LEX_INTERPCASEMOD;
2352         return yylex();
2353     }
2354
2355     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2356     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2357         PL_linestr = PL_lex_repl;
2358         PL_lex_inpat = 0;
2359         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2360         PL_bufend += SvCUR(PL_linestr);
2361         PL_last_lop = PL_last_uni = NULL;
2362         SAVEFREESV(PL_linestr);
2363         PL_lex_dojoin = FALSE;
2364         PL_lex_brackets = 0;
2365         PL_lex_casemods = 0;
2366         *PL_lex_casestack = '\0';
2367         PL_lex_starts = 0;
2368         if (SvEVALED(PL_lex_repl)) {
2369             PL_lex_state = LEX_INTERPNORMAL;
2370             PL_lex_starts++;
2371             /*  we don't clear PL_lex_repl here, so that we can check later
2372                 whether this is an evalled subst; that means we rely on the
2373                 logic to ensure sublex_done() is called again only via the
2374                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2375         }
2376         else {
2377             PL_lex_state = LEX_INTERPCONCAT;
2378             PL_lex_repl = NULL;
2379         }
2380         return ',';
2381     }
2382     else {
2383 #ifdef PERL_MAD
2384         if (PL_madskills) {
2385             if (PL_thiswhite) {
2386                 if (!PL_endwhite)
2387                     PL_endwhite = newSVpvs("");
2388                 sv_catsv(PL_endwhite, PL_thiswhite);
2389                 PL_thiswhite = 0;
2390             }
2391             if (PL_thistoken)
2392                 sv_setpvs(PL_thistoken,"");
2393             else
2394                 PL_realtokenstart = -1;
2395         }
2396 #endif
2397         LEAVE;
2398         PL_bufend = SvPVX(PL_linestr);
2399         PL_bufend += SvCUR(PL_linestr);
2400         PL_expect = XOPERATOR;
2401         PL_sublex_info.sub_inwhat = 0;
2402         return ')';
2403     }
2404 }
2405
2406 /*
2407   scan_const
2408
2409   Extracts a pattern, double-quoted string, or transliteration.  This
2410   is terrifying code.
2411
2412   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2413   processing a pattern (PL_lex_inpat is true), a transliteration
2414   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2415
2416   Returns a pointer to the character scanned up to. If this is
2417   advanced from the start pointer supplied (i.e. if anything was
2418   successfully parsed), will leave an OP for the substring scanned
2419   in pl_yylval. Caller must intuit reason for not parsing further
2420   by looking at the next characters herself.
2421
2422   In patterns:
2423     backslashes:
2424       double-quoted style: \r and \n
2425       regexp special ones: \D \s
2426       constants: \x31
2427       backrefs: \1
2428       case and quoting: \U \Q \E
2429     stops on @ and $, but not for $ as tail anchor
2430
2431   In transliterations:
2432     characters are VERY literal, except for - not at the start or end
2433     of the string, which indicates a range. If the range is in bytes,
2434     scan_const expands the range to the full set of intermediate
2435     characters. If the range is in utf8, the hyphen is replaced with
2436     a certain range mark which will be handled by pmtrans() in op.c.
2437
2438   In double-quoted strings:
2439     backslashes:
2440       double-quoted style: \r and \n
2441       constants: \x31
2442       deprecated backrefs: \1 (in substitution replacements)
2443       case and quoting: \U \Q \E
2444     stops on @ and $
2445
2446   scan_const does *not* construct ops to handle interpolated strings.
2447   It stops processing as soon as it finds an embedded $ or @ variable
2448   and leaves it to the caller to work out what's going on.
2449
2450   embedded arrays (whether in pattern or not) could be:
2451       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2452
2453   $ in double-quoted strings must be the symbol of an embedded scalar.
2454
2455   $ in pattern could be $foo or could be tail anchor.  Assumption:
2456   it's a tail anchor if $ is the last thing in the string, or if it's
2457   followed by one of "()| \r\n\t"
2458
2459   \1 (backreferences) are turned into $1
2460
2461   The structure of the code is
2462       while (there's a character to process) {
2463           handle transliteration ranges
2464           skip regexp comments /(?#comment)/ and codes /(?{code})/
2465           skip #-initiated comments in //x patterns
2466           check for embedded arrays
2467           check for embedded scalars
2468           if (backslash) {
2469               leave intact backslashes from leaveit (below)
2470               deprecate \1 in substitution replacements
2471               handle string-changing backslashes \l \U \Q \E, etc.
2472               switch (what was escaped) {
2473                   handle \- in a transliteration (becomes a literal -)
2474                   handle \132 (octal characters)
2475                   handle \x15 and \x{1234} (hex characters)
2476                   handle \N{name} (named characters)
2477                   handle \cV (control characters)
2478                   handle printf-style backslashes (\f, \r, \n, etc)
2479               } (end switch)
2480               continue
2481           } (end if backslash)
2482           handle regular character
2483     } (end while character to read)
2484
2485 */
2486
2487 STATIC char *
2488 S_scan_const(pTHX_ char *start)
2489 {
2490     dVAR;
2491     register char *send = PL_bufend;            /* end of the constant */
2492     SV *sv = newSV(send - start);               /* sv for the constant.  See
2493                                                    note below on sizing. */
2494     register char *s = start;                   /* start of the constant */
2495     register char *d = SvPVX(sv);               /* destination for copies */
2496     bool dorange = FALSE;                       /* are we in a translit range? */
2497     bool didrange = FALSE;                      /* did we just finish a range? */
2498     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2499     I32  this_utf8 = UTF;                       /* Is the source string assumed
2500                                                    to be UTF8?  But, this can
2501                                                    show as true when the source
2502                                                    isn't utf8, as for example
2503                                                    when it is entirely composed
2504                                                    of hex constants */
2505
2506     /* Note on sizing:  The scanned constant is placed into sv, which is
2507      * initialized by newSV() assuming one byte of output for every byte of
2508      * input.  This routine expects newSV() to allocate an extra byte for a
2509      * trailing NUL, which this routine will append if it gets to the end of
2510      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2511      * CAPITAL LETTER A}), or more output than input if the constant ends up
2512      * recoded to utf8, but each time a construct is found that might increase
2513      * the needed size, SvGROW() is called.  Its size parameter each time is
2514      * based on the best guess estimate at the time, namely the length used so
2515      * far, plus the length the current construct will occupy, plus room for
2516      * the trailing NUL, plus one byte for every input byte still unscanned */
2517
2518     UV uv;
2519 #ifdef EBCDIC
2520     UV literal_endpoint = 0;
2521     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2522 #endif
2523
2524     PERL_ARGS_ASSERT_SCAN_CONST;
2525
2526     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2527         /* If we are doing a trans and we know we want UTF8 set expectation */
2528         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2529         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2530     }
2531
2532
2533     while (s < send || dorange) {
2534         /* get transliterations out of the way (they're most literal) */
2535         if (PL_lex_inwhat == OP_TRANS) {
2536             /* expand a range A-Z to the full set of characters.  AIE! */
2537             if (dorange) {
2538                 I32 i;                          /* current expanded character */
2539                 I32 min;                        /* first character in range */
2540                 I32 max;                        /* last character in range */
2541
2542 #ifdef EBCDIC
2543                 UV uvmax = 0;
2544 #endif
2545
2546                 if (has_utf8
2547 #ifdef EBCDIC
2548                     && !native_range
2549 #endif
2550                     ) {
2551                     char * const c = (char*)utf8_hop((U8*)d, -1);
2552                     char *e = d++;
2553                     while (e-- > c)
2554                         *(e + 1) = *e;
2555                     *c = (char)UTF_TO_NATIVE(0xff);
2556                     /* mark the range as done, and continue */
2557                     dorange = FALSE;
2558                     didrange = TRUE;
2559                     continue;
2560                 }
2561
2562                 i = d - SvPVX_const(sv);                /* remember current offset */
2563 #ifdef EBCDIC
2564                 SvGROW(sv,
2565                        SvLEN(sv) + (has_utf8 ?
2566                                     (512 - UTF_CONTINUATION_MARK +
2567                                      UNISKIP(0x100))
2568                                     : 256));
2569                 /* How many two-byte within 0..255: 128 in UTF-8,
2570                  * 96 in UTF-8-mod. */
2571 #else
2572                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2573 #endif
2574                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2575 #ifdef EBCDIC
2576                 if (has_utf8) {
2577                     int j;
2578                     for (j = 0; j <= 1; j++) {
2579                         char * const c = (char*)utf8_hop((U8*)d, -1);
2580                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2581                         if (j)
2582                             min = (U8)uv;
2583                         else if (uv < 256)
2584                             max = (U8)uv;
2585                         else {
2586                             max = (U8)0xff; /* only to \xff */
2587                             uvmax = uv; /* \x{100} to uvmax */
2588                         }
2589                         d = c; /* eat endpoint chars */
2590                      }
2591                 }
2592                else {
2593 #endif
2594                    d -= 2;              /* eat the first char and the - */
2595                    min = (U8)*d;        /* first char in range */
2596                    max = (U8)d[1];      /* last char in range  */
2597 #ifdef EBCDIC
2598                }
2599 #endif
2600
2601                 if (min > max) {
2602                     Perl_croak(aTHX_
2603                                "Invalid range \"%c-%c\" in transliteration operator",
2604                                (char)min, (char)max);
2605                 }
2606
2607 #ifdef EBCDIC
2608                 if (literal_endpoint == 2 &&
2609                     ((isLOWER(min) && isLOWER(max)) ||
2610                      (isUPPER(min) && isUPPER(max)))) {
2611                     if (isLOWER(min)) {
2612                         for (i = min; i <= max; i++)
2613                             if (isLOWER(i))
2614                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2615                     } else {
2616                         for (i = min; i <= max; i++)
2617                             if (isUPPER(i))
2618                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2619                     }
2620                 }
2621                 else
2622 #endif
2623                     for (i = min; i <= max; i++)
2624 #ifdef EBCDIC
2625                         if (has_utf8) {
2626                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2627                             if (UNI_IS_INVARIANT(ch))
2628                                 *d++ = (U8)i;
2629                             else {
2630                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2631                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2632                             }
2633                         }
2634                         else
2635 #endif
2636                             *d++ = (char)i;
2637
2638 #ifdef EBCDIC
2639                 if (uvmax) {
2640                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2641                     if (uvmax > 0x101)
2642                         *d++ = (char)UTF_TO_NATIVE(0xff);
2643                     if (uvmax > 0x100)
2644                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2645                 }
2646 #endif
2647
2648                 /* mark the range as done, and continue */
2649                 dorange = FALSE;
2650                 didrange = TRUE;
2651 #ifdef EBCDIC
2652                 literal_endpoint = 0;
2653 #endif
2654                 continue;
2655             }
2656
2657             /* range begins (ignore - as first or last char) */
2658             else if (*s == '-' && s+1 < send  && s != start) {
2659                 if (didrange) {
2660                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2661                 }
2662                 if (has_utf8
2663 #ifdef EBCDIC
2664                     && !native_range
2665 #endif
2666                     ) {
2667                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2668                     s++;
2669                     continue;
2670                 }
2671                 dorange = TRUE;
2672                 s++;
2673             }
2674             else {
2675                 didrange = FALSE;
2676 #ifdef EBCDIC
2677                 literal_endpoint = 0;
2678                 native_range = TRUE;
2679 #endif
2680             }
2681         }
2682
2683         /* if we get here, we're not doing a transliteration */
2684
2685         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2686            except for the last char, which will be done separately. */
2687         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2688             if (s[2] == '#') {
2689                 while (s+1 < send && *s != ')')
2690                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2691             }
2692             else if (s[2] == '{' /* This should match regcomp.c */
2693                     || (s[2] == '?' && s[3] == '{'))
2694             {
2695                 I32 count = 1;
2696                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2697                 char c;
2698
2699                 while (count && (c = *regparse)) {
2700                     if (c == '\\' && regparse[1])
2701                         regparse++;
2702                     else if (c == '{')
2703                         count++;
2704                     else if (c == '}')
2705                         count--;
2706                     regparse++;
2707                 }
2708                 if (*regparse != ')')
2709                     regparse--;         /* Leave one char for continuation. */
2710                 while (s < regparse)
2711                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2712             }
2713         }
2714
2715         /* likewise skip #-initiated comments in //x patterns */
2716         else if (*s == '#' && PL_lex_inpat &&
2717           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2718             while (s+1 < send && *s != '\n')
2719                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2720         }
2721
2722         /* check for embedded arrays
2723            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2724            */
2725         else if (*s == '@' && s[1]) {
2726             if (isALNUM_lazy_if(s+1,UTF))
2727                 break;
2728             if (strchr(":'{$", s[1]))
2729                 break;
2730             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2731                 break; /* in regexp, neither @+ nor @- are interpolated */
2732         }
2733
2734         /* check for embedded scalars.  only stop if we're sure it's a
2735            variable.
2736         */
2737         else if (*s == '$') {
2738             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2739                 break;
2740             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2741                 if (s[1] == '\\') {
2742                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2743                                    "Possible unintended interpolation of $\\ in regex");
2744                 }
2745                 break;          /* in regexp, $ might be tail anchor */
2746             }
2747         }
2748
2749         /* End of else if chain - OP_TRANS rejoin rest */
2750
2751         /* backslashes */
2752         if (*s == '\\' && s+1 < send) {
2753             s++;
2754
2755             /* deprecate \1 in strings and substitution replacements */
2756             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2757                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2758             {
2759                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2760                 *--s = '$';
2761                 break;
2762             }
2763
2764             /* string-change backslash escapes */
2765             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2766                 --s;
2767                 break;
2768             }
2769             /* skip any other backslash escapes in a pattern */
2770             else if (PL_lex_inpat) {
2771                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2772                 goto default_action;
2773             }
2774
2775             /* if we get here, it's either a quoted -, or a digit */
2776             switch (*s) {
2777
2778             /* quoted - in transliterations */
2779             case '-':
2780                 if (PL_lex_inwhat == OP_TRANS) {
2781                     *d++ = *s++;
2782                     continue;
2783                 }
2784                 /* FALL THROUGH */
2785             default:
2786                 {
2787                     if ((isALPHA(*s) || isDIGIT(*s)))
2788                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2789                                        "Unrecognized escape \\%c passed through",
2790                                        *s);
2791                     /* default action is to copy the quoted character */
2792                     goto default_action;
2793                 }
2794
2795             /* eg. \132 indicates the octal constant 0x132 */
2796             case '0': case '1': case '2': case '3':
2797             case '4': case '5': case '6': case '7':
2798                 {
2799                     I32 flags = 0;
2800                     STRLEN len = 3;
2801                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2802                     s += len;
2803                 }
2804                 goto NUM_ESCAPE_INSERT;
2805
2806             /* eg. \x24 indicates the hex constant 0x24 */
2807             case 'x':
2808                 ++s;
2809                 if (*s == '{') {
2810                     char* const e = strchr(s, '}');
2811                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2812                       PERL_SCAN_DISALLOW_PREFIX;
2813                     STRLEN len;
2814
2815                     ++s;
2816                     if (!e) {
2817                         yyerror("Missing right brace on \\x{}");
2818                         continue;
2819                     }
2820                     len = e - s;
2821                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2822                     s = e + 1;
2823                 }
2824                 else {
2825                     {
2826                         STRLEN len = 2;
2827                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2828                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2829                         s += len;
2830                     }
2831                 }
2832
2833               NUM_ESCAPE_INSERT:
2834                 /* Insert oct, hex, or \N{U+...} escaped character.  There will
2835                  * always be enough room in sv since such escapes will be
2836                  * longer than any UTF-8 sequence they can end up as, except if
2837                  * they force us to recode the rest of the string into utf8 */
2838
2839                 /* Here uv is the ordinal of the next character being added in
2840                  * unicode (converted from native).  (It has to be done before
2841                  * here because \N is interpreted as unicode, and oct and hex
2842                  * as native.) */
2843                 if (!UNI_IS_INVARIANT(uv)) {
2844                     if (!has_utf8 && uv > 255) {
2845                         /* Might need to recode whatever we have accumulated so
2846                          * far if it contains any chars variant in utf8 or
2847                          * utf-ebcdic. */
2848
2849                         SvCUR_set(sv, d - SvPVX_const(sv));
2850                         SvPOK_on(sv);
2851                         *d = '\0';
2852                         /* See Note on sizing above.  */
2853                         sv_utf8_upgrade_flags_grow(sv,
2854                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2855                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2856                         d = SvPVX(sv) + SvCUR(sv);
2857                         has_utf8 = TRUE;
2858                     }
2859
2860                     if (has_utf8) {
2861                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2862                         if (PL_lex_inwhat == OP_TRANS &&
2863                             PL_sublex_info.sub_op) {
2864                             PL_sublex_info.sub_op->op_private |=
2865                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2866                                              : OPpTRANS_TO_UTF);
2867                         }
2868 #ifdef EBCDIC
2869                         if (uv > 255 && !dorange)
2870                             native_range = FALSE;
2871 #endif
2872                     }
2873                     else {
2874                         *d++ = (char)uv;
2875                     }
2876                 }
2877                 else {
2878                     *d++ = (char) uv;
2879                 }
2880                 continue;
2881
2882             /* \N{LATIN SMALL LETTER A} is a named character, and so is
2883              * \N{U+0041} */
2884             case 'N':
2885                 ++s;
2886                 if (*s == '{') {
2887                     char* e = strchr(s, '}');
2888                     SV *res;
2889                     STRLEN len;
2890                     const char *str;
2891
2892                     if (!e) {
2893                         yyerror("Missing right brace on \\N{}");
2894                         e = s - 1;
2895                         goto cont_scan;
2896                     }
2897                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2898                         /* \N{U+...} The ... is a unicode value even on EBCDIC
2899                          * machines */
2900                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2901                           PERL_SCAN_DISALLOW_PREFIX;
2902                         s += 3;
2903                         len = e - s;
2904                         uv = grok_hex(s, &len, &flags, NULL);
2905                         if ( e > s && len != (STRLEN)(e - s) ) {
2906                             uv = 0xFFFD;
2907                         }
2908                         s = e + 1;
2909                         goto NUM_ESCAPE_INSERT;
2910                     }
2911                     res = newSVpvn(s + 1, e - s - 1);
2912                     res = new_constant( NULL, 0, "charnames",
2913                                         res, NULL, s - 2, e - s + 3 );
2914                     if (has_utf8)
2915                         sv_utf8_upgrade(res);
2916                     str = SvPV_const(res,len);
2917 #ifdef EBCDIC_NEVER_MIND
2918                     /* charnames uses pack U and that has been
2919                      * recently changed to do the below uni->native
2920                      * mapping, so this would be redundant (and wrong,
2921                      * the code point would be doubly converted).
2922                      * But leave this in just in case the pack U change
2923                      * gets revoked, but the semantics is still
2924                      * desireable for charnames. --jhi */
2925                     {
2926                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2927
2928                          if (uv < 0x100) {
2929                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2930
2931                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2932                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2933                               str = SvPV_const(res, len);
2934                          }
2935                     }
2936 #endif
2937                     /* If destination is not in utf8 but this new character is,
2938                      * recode the dest to utf8 */
2939                     if (!has_utf8 && SvUTF8(res)) {
2940                         SvCUR_set(sv, d - SvPVX_const(sv));
2941                         SvPOK_on(sv);
2942                         *d = '\0';
2943                         /* See Note on sizing above.  */
2944                         sv_utf8_upgrade_flags_grow(sv,
2945                                             SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2946                                             len + (STRLEN)(send - s) + 1);
2947                         d = SvPVX(sv) + SvCUR(sv);
2948                         has_utf8 = TRUE;
2949                     } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2950
2951                         /* See Note on sizing above.  (NOTE: SvCUR() is not set
2952                          * correctly here). */
2953                         const STRLEN off = d - SvPVX_const(sv);
2954                         d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2955                     }
2956 #ifdef EBCDIC
2957                     if (!dorange)
2958                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2959 #endif
2960                     Copy(str, d, len, char);
2961                     d += len;
2962                     SvREFCNT_dec(res);
2963                   cont_scan:
2964                     s = e + 1;
2965                 }
2966                 else
2967                     yyerror("Missing braces on \\N{}");
2968                 continue;
2969
2970             /* \c is a control character */
2971             case 'c':
2972                 s++;
2973                 if (s < send) {
2974                     U8 c = *s++;
2975 #ifdef EBCDIC
2976                     if (isLOWER(c))
2977                         c = toUPPER(c);
2978 #endif
2979                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2980                 }
2981                 else {
2982                     yyerror("Missing control char name in \\c");
2983                 }
2984                 continue;
2985
2986             /* printf-style backslashes, formfeeds, newlines, etc */
2987             case 'b':
2988                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2989                 break;
2990             case 'n':
2991                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2992                 break;
2993             case 'r':
2994                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2995                 break;
2996             case 'f':
2997                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2998                 break;
2999             case 't':
3000                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3001                 break;
3002             case 'e':
3003                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3004                 break;
3005             case 'a':
3006                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3007                 break;
3008             } /* end switch */
3009
3010             s++;
3011             continue;
3012         } /* end if (backslash) */
3013 #ifdef EBCDIC
3014         else
3015             literal_endpoint++;
3016 #endif
3017
3018     default_action:
3019         /* If we started with encoded form, or already know we want it,
3020            then encode the next character */
3021         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3022             STRLEN len  = 1;
3023
3024
3025             /* One might think that it is wasted effort in the case of the
3026              * source being utf8 (this_utf8 == TRUE) to take the next character
3027              * in the source, convert it to an unsigned value, and then convert
3028              * it back again.  But the source has not been validated here.  The
3029              * routine that does the conversion checks for errors like
3030              * malformed utf8 */
3031
3032             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3033             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3034             if (!has_utf8) {
3035                 SvCUR_set(sv, d - SvPVX_const(sv));
3036                 SvPOK_on(sv);
3037                 *d = '\0';
3038                 /* See Note on sizing above.  */
3039                 sv_utf8_upgrade_flags_grow(sv,
3040                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3041                                         need + (STRLEN)(send - s) + 1);
3042                 d = SvPVX(sv) + SvCUR(sv);
3043                 has_utf8 = TRUE;
3044             } else if (need > len) {
3045                 /* encoded value larger than old, may need extra space (NOTE:
3046                  * SvCUR() is not set correctly here).   See Note on sizing
3047                  * above.  */
3048                 const STRLEN off = d - SvPVX_const(sv);
3049                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3050             }
3051             s += len;
3052
3053             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3054 #ifdef EBCDIC
3055             if (uv > 255 && !dorange)
3056                 native_range = FALSE;
3057 #endif
3058         }
3059         else {
3060             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3061         }
3062     } /* while loop to process each character */
3063
3064     /* terminate the string and set up the sv */
3065     *d = '\0';
3066     SvCUR_set(sv, d - SvPVX_const(sv));
3067     if (SvCUR(sv) >= SvLEN(sv))
3068         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3069
3070     SvPOK_on(sv);
3071     if (PL_encoding && !has_utf8) {
3072         sv_recode_to_utf8(sv, PL_encoding);
3073         if (SvUTF8(sv))
3074             has_utf8 = TRUE;
3075     }
3076     if (has_utf8) {
3077         SvUTF8_on(sv);
3078         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3079             PL_sublex_info.sub_op->op_private |=
3080                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3081         }
3082     }
3083
3084     /* shrink the sv if we allocated more than we used */
3085     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3086         SvPV_shrink_to_cur(sv);
3087     }
3088
3089     /* return the substring (via pl_yylval) only if we parsed anything */
3090     if (s > PL_bufptr) {
3091         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3092             const char *const key = PL_lex_inpat ? "qr" : "q";
3093             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3094             const char *type;
3095             STRLEN typelen;
3096
3097             if (PL_lex_inwhat == OP_TRANS) {
3098                 type = "tr";
3099                 typelen = 2;
3100             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3101                 type = "s";
3102                 typelen = 1;
3103             } else  {
3104                 type = "qq";
3105                 typelen = 2;
3106             }
3107
3108             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3109                                 type, typelen);
3110         }
3111         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3112     } else
3113         SvREFCNT_dec(sv);
3114     return s;
3115 }
3116
3117 /* S_intuit_more
3118  * Returns TRUE if there's more to the expression (e.g., a subscript),
3119  * FALSE otherwise.
3120  *
3121  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3122  *
3123  * ->[ and ->{ return TRUE
3124  * { and [ outside a pattern are always subscripts, so return TRUE
3125  * if we're outside a pattern and it's not { or [, then return FALSE
3126  * if we're in a pattern and the first char is a {
3127  *   {4,5} (any digits around the comma) returns FALSE
3128  * if we're in a pattern and the first char is a [
3129  *   [] returns FALSE
3130  *   [SOMETHING] has a funky algorithm to decide whether it's a
3131  *      character class or not.  It has to deal with things like
3132  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3133  * anything else returns TRUE
3134  */
3135
3136 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3137
3138 STATIC int
3139 S_intuit_more(pTHX_ register char *s)
3140 {
3141     dVAR;
3142
3143     PERL_ARGS_ASSERT_INTUIT_MORE;
3144
3145     if (PL_lex_brackets)
3146         return TRUE;
3147     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3148         return TRUE;
3149     if (*s != '{' && *s != '[')
3150         return FALSE;
3151     if (!PL_lex_inpat)
3152         return TRUE;
3153
3154     /* In a pattern, so maybe we have {n,m}. */
3155     if (*s == '{') {
3156         s++;
3157         if (!isDIGIT(*s))
3158             return TRUE;
3159         while (isDIGIT(*s))
3160             s++;
3161         if (*s == ',')
3162             s++;
3163         while (isDIGIT(*s))
3164             s++;
3165         if (*s == '}')
3166             return FALSE;
3167         return TRUE;
3168
3169     }
3170
3171     /* On the other hand, maybe we have a character class */
3172
3173     s++;
3174     if (*s == ']' || *s == '^')
3175         return FALSE;
3176     else {
3177         /* this is terrifying, and it works */
3178         int weight = 2;         /* let's weigh the evidence */
3179         char seen[256];
3180         unsigned char un_char = 255, last_un_char;
3181         const char * const send = strchr(s,']');
3182         char tmpbuf[sizeof PL_tokenbuf * 4];
3183
3184         if (!send)              /* has to be an expression */
3185             return TRUE;
3186
3187         Zero(seen,256,char);
3188         if (*s == '$')
3189             weight -= 3;
3190         else if (isDIGIT(*s)) {
3191             if (s[1] != ']') {
3192                 if (isDIGIT(s[1]) && s[2] == ']')
3193                     weight -= 10;
3194             }
3195             else
3196                 weight -= 100;
3197         }
3198         for (; s < send; s++) {
3199             last_un_char = un_char;
3200             un_char = (unsigned char)*s;
3201             switch (*s) {
3202             case '@':
3203             case '&':
3204             case '$':
3205                 weight -= seen[un_char] * 10;
3206                 if (isALNUM_lazy_if(s+1,UTF)) {
3207                     int len;
3208                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3209                     len = (int)strlen(tmpbuf);
3210                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3211                         weight -= 100;
3212                     else
3213                         weight -= 10;
3214                 }
3215                 else if (*s == '$' && s[1] &&
3216                   strchr("[#!%*<>()-=",s[1])) {
3217                     if (/*{*/ strchr("])} =",s[2]))
3218                         weight -= 10;
3219                     else
3220                         weight -= 1;
3221                 }
3222                 break;
3223             case '\\':
3224                 un_char = 254;
3225                 if (s[1]) {
3226                     if (strchr("wds]",s[1]))
3227                         weight += 100;
3228                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3229                         weight += 1;
3230                     else if (strchr("rnftbxcav",s[1]))
3231                         weight += 40;
3232                     else if (isDIGIT(s[1])) {
3233                         weight += 40;
3234                         while (s[1] && isDIGIT(s[1]))
3235                             s++;
3236                     }
3237                 }
3238                 else
3239                     weight += 100;
3240                 break;
3241             case '-':
3242                 if (s[1] == '\\')
3243                     weight += 50;
3244                 if (strchr("aA01! ",last_un_char))
3245                     weight += 30;
3246                 if (strchr("zZ79~",s[1]))
3247                     weight += 30;
3248                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3249                     weight -= 5;        /* cope with negative subscript */
3250                 break;
3251             default:
3252                 if (!isALNUM(last_un_char)
3253                     && !(last_un_char == '$' || last_un_char == '@'
3254                          || last_un_char == '&')
3255                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3256                     char *d = tmpbuf;
3257                     while (isALPHA(*s))
3258                         *d++ = *s++;
3259                     *d = '\0';
3260                     if (keyword(tmpbuf, d - tmpbuf, 0))
3261                         weight -= 150;
3262                 }
3263                 if (un_char == last_un_char + 1)
3264                     weight += 5;
3265                 weight -= seen[un_char];
3266                 break;
3267             }
3268             seen[un_char]++;
3269         }
3270         if (weight >= 0)        /* probably a character class */
3271             return FALSE;
3272     }
3273
3274     return TRUE;
3275 }
3276
3277 /*
3278  * S_intuit_method
3279  *
3280  * Does all the checking to disambiguate
3281  *   foo bar
3282  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3283  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3284  *
3285  * First argument is the stuff after the first token, e.g. "bar".
3286  *
3287  * Not a method if bar is a filehandle.
3288  * Not a method if foo is a subroutine prototyped to take a filehandle.
3289  * Not a method if it's really "Foo $bar"
3290  * Method if it's "foo $bar"
3291  * Not a method if it's really "print foo $bar"
3292  * Method if it's really "foo package::" (interpreted as package->foo)
3293  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3294  * Not a method if bar is a filehandle or package, but is quoted with
3295  *   =>
3296  */
3297
3298 STATIC int
3299 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3300 {
3301     dVAR;
3302     char *s = start + (*start == '$');
3303     char tmpbuf[sizeof PL_tokenbuf];
3304     STRLEN len;
3305     GV* indirgv;
3306 #ifdef PERL_MAD
3307     int soff;
3308 #endif
3309
3310     PERL_ARGS_ASSERT_INTUIT_METHOD;
3311
3312     if (gv) {
3313         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3314             return 0;
3315         if (cv) {
3316             if (SvPOK(cv)) {
3317                 const char *proto = SvPVX_const(cv);
3318                 if (proto) {
3319                     if (*proto == ';')
3320                         proto++;
3321                     if (*proto == '*')
3322                         return 0;
3323                 }
3324             }
3325         } else
3326             gv = NULL;
3327     }
3328     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3329     /* start is the beginning of the possible filehandle/object,
3330      * and s is the end of it
3331      * tmpbuf is a copy of it
3332      */
3333
3334     if (*start == '$') {
3335         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3336                 isUPPER(*PL_tokenbuf))
3337             return 0;
3338 #ifdef PERL_MAD
3339         len = start - SvPVX(PL_linestr);
3340 #endif
3341         s = PEEKSPACE(s);
3342 #ifdef PERL_MAD
3343         start = SvPVX(PL_linestr) + len;
3344 #endif
3345         PL_bufptr = start;
3346         PL_expect = XREF;
3347         return *s == '(' ? FUNCMETH : METHOD;
3348     }
3349     if (!keyword(tmpbuf, len, 0)) {
3350         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3351             len -= 2;
3352             tmpbuf[len] = '\0';
3353 #ifdef PERL_MAD
3354             soff = s - SvPVX(PL_linestr);
3355 #endif
3356             goto bare_package;
3357         }
3358         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3359         if (indirgv && GvCVu(indirgv))
3360             return 0;
3361         /* filehandle or package name makes it a method */
3362         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3363 #ifdef PERL_MAD
3364             soff = s - SvPVX(PL_linestr);
3365 #endif
3366             s = PEEKSPACE(s);
3367             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3368                 return 0;       /* no assumptions -- "=>" quotes bearword */
3369       bare_package:
3370             start_force(PL_curforce);
3371             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3372                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3373             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3374             if (PL_madskills)
3375                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3376             PL_expect = XTERM;
3377             force_next(WORD);
3378             PL_bufptr = s;
3379 #ifdef PERL_MAD
3380             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3381 #endif
3382             return *s == '(' ? FUNCMETH : METHOD;
3383         }
3384     }
3385     return 0;
3386 }
3387
3388 /* Encoded script support. filter_add() effectively inserts a
3389  * 'pre-processing' function into the current source input stream.
3390  * Note that the filter function only applies to the current source file
3391  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3392  *
3393  * The datasv parameter (which may be NULL) can be used to pass
3394  * private data to this instance of the filter. The filter function
3395  * can recover the SV using the FILTER_DATA macro and use it to
3396  * store private buffers and state information.
3397  *
3398  * The supplied datasv parameter is upgraded to a PVIO type
3399  * and the IoDIRP/IoANY field is used to store the function pointer,
3400  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3401  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3402  * private use must be set using malloc'd pointers.
3403  */
3404
3405 SV *
3406 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3407 {
3408     dVAR;
3409     if (!funcp)
3410         return NULL;
3411
3412     if (!PL_parser)
3413         return NULL;
3414
3415     if (!PL_rsfp_filters)
3416         PL_rsfp_filters = newAV();
3417     if (!datasv)
3418         datasv = newSV(0);
3419     SvUPGRADE(datasv, SVt_PVIO);
3420     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3421     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3422     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3423                           FPTR2DPTR(void *, IoANY(datasv)),
3424                           SvPV_nolen(datasv)));
3425     av_unshift(PL_rsfp_filters, 1);
3426     av_store(PL_rsfp_filters, 0, datasv) ;
3427     return(datasv);
3428 }
3429
3430
3431 /* Delete most recently added instance of this filter function. */
3432 void
3433 Perl_filter_del(pTHX_ filter_t funcp)
3434 {
3435     dVAR;
3436     SV *datasv;
3437
3438     PERL_ARGS_ASSERT_FILTER_DEL;
3439
3440 #ifdef DEBUGGING
3441     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3442                           FPTR2DPTR(void*, funcp)));
3443 #endif
3444     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3445         return;
3446     /* if filter is on top of stack (usual case) just pop it off */
3447     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3448     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3449         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3450         IoANY(datasv) = (void *)NULL;
3451         sv_free(av_pop(PL_rsfp_filters));
3452
3453         return;
3454     }
3455     /* we need to search for the correct entry and clear it     */
3456     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3457 }
3458
3459
3460 /* Invoke the idxth filter function for the current rsfp.        */
3461 /* maxlen 0 = read one text line */
3462 I32
3463 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3464 {
3465     dVAR;
3466     filter_t funcp;
3467     SV *datasv = NULL;
3468     /* This API is bad. It should have been using unsigned int for maxlen.
3469        Not sure if we want to change the API, but if not we should sanity
3470        check the value here.  */
3471     const unsigned int correct_length
3472         = maxlen < 0 ?
3473 #ifdef PERL_MICRO
3474         0x7FFFFFFF
3475 #else
3476         INT_MAX
3477 #endif
3478         : maxlen;
3479
3480     PERL_ARGS_ASSERT_FILTER_READ;
3481
3482     if (!PL_parser || !PL_rsfp_filters)
3483         return -1;
3484     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3485         /* Provide a default input filter to make life easy.    */
3486         /* Note that we append to the line. This is handy.      */
3487         DEBUG_P(PerlIO_printf(Perl_debug_log,
3488                               "filter_read %d: from rsfp\n", idx));
3489         if (correct_length) {
3490             /* Want a block */
3491             int len ;
3492             const int old_len = SvCUR(buf_sv);
3493
3494             /* ensure buf_sv is large enough */
3495             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3496             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3497                                    correct_length)) <= 0) {
3498                 if (PerlIO_error(PL_rsfp))
3499                     return -1;          /* error */
3500                 else
3501                     return 0 ;          /* end of file */
3502             }
3503             SvCUR_set(buf_sv, old_len + len) ;
3504             SvPVX(buf_sv)[old_len + len] = '\0';
3505         } else {
3506             /* Want a line */
3507             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3508                 if (PerlIO_error(PL_rsfp))
3509                     return -1;          /* error */
3510                 else
3511                     return 0 ;          /* end of file */
3512             }
3513         }
3514         return SvCUR(buf_sv);
3515     }
3516     /* Skip this filter slot if filter has been deleted */
3517     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3518         DEBUG_P(PerlIO_printf(Perl_debug_log,
3519                               "filter_read %d: skipped (filter deleted)\n",
3520                               idx));
3521         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3522     }
3523     /* Get function pointer hidden within datasv        */
3524     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3525     DEBUG_P(PerlIO_printf(Perl_debug_log,
3526                           "filter_read %d: via function %p (%s)\n",
3527                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3528     /* Call function. The function is expected to       */
3529     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3530     /* Return: <0:error, =0:eof, >0:not eof             */
3531     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3532 }
3533
3534 STATIC char *
3535 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3536 {
3537     dVAR;
3538
3539     PERL_ARGS_ASSERT_FILTER_GETS;
3540
3541 #ifdef PERL_CR_FILTER
3542     if (!PL_rsfp_filters) {
3543         filter_add(S_cr_textfilter,NULL);
3544     }
3545 #endif
3546     if (PL_rsfp_filters) {
3547         if (!append)
3548             SvCUR_set(sv, 0);   /* start with empty line        */
3549         if (FILTER_READ(0, sv, 0) > 0)
3550             return ( SvPVX(sv) ) ;
3551         else
3552             return NULL ;
3553     }
3554     else
3555         return (sv_gets(sv, PL_rsfp, append));
3556 }
3557
3558 STATIC HV *
3559 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3560 {
3561     dVAR;
3562     GV *gv;
3563
3564     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3565
3566     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3567         return PL_curstash;
3568
3569     if (len > 2 &&
3570         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3571         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3572     {
3573         return GvHV(gv);                        /* Foo:: */
3574     }
3575
3576     /* use constant CLASS => 'MyClass' */
3577     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3578     if (gv && GvCV(gv)) {
3579         SV * const sv = cv_const_sv(GvCV(gv));
3580         if (sv)
3581             pkgname = SvPV_const(sv, len);
3582     }
3583
3584     return gv_stashpvn(pkgname, len, 0);
3585 }
3586
3587 /*
3588  * S_readpipe_override
3589  * Check whether readpipe() is overriden, and generates the appropriate
3590  * optree, provided sublex_start() is called afterwards.
3591  */
3592 STATIC void
3593 S_readpipe_override(pTHX)
3594 {
3595     GV **gvp;
3596     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3597     pl_yylval.ival = OP_BACKTICK;
3598     if ((gv_readpipe
3599                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3600             ||
3601             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3602              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3603              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3604     {
3605         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3606             append_elem(OP_LIST,
3607                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3608                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3609     }
3610 }
3611
3612 #ifdef PERL_MAD
3613  /*
3614  * Perl_madlex
3615  * The intent of this yylex wrapper is to minimize the changes to the
3616  * tokener when we aren't interested in collecting madprops.  It remains
3617  * to be seen how successful this strategy will be...
3618  */
3619
3620 int
3621 Perl_madlex(pTHX)
3622 {
3623     int optype;
3624     char *s = PL_bufptr;
3625
3626     /* make sure PL_thiswhite is initialized */
3627     PL_thiswhite = 0;
3628     PL_thismad = 0;
3629
3630     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3631     if (PL_pending_ident)
3632         return S_pending_ident(aTHX);
3633
3634     /* previous token ate up our whitespace? */
3635     if (!PL_lasttoke && PL_nextwhite) {
3636         PL_thiswhite = PL_nextwhite;
3637         PL_nextwhite = 0;
3638     }
3639
3640     /* isolate the token, and figure out where it is without whitespace */
3641     PL_realtokenstart = -1;
3642     PL_thistoken = 0;
3643     optype = yylex();
3644     s = PL_bufptr;
3645     assert(PL_curforce < 0);
3646
3647     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3648         if (!PL_thistoken) {
3649             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3650                 PL_thistoken = newSVpvs("");
3651             else {
3652                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3653                 PL_thistoken = newSVpvn(tstart, s - tstart);
3654             }
3655         }
3656         if (PL_thismad) /* install head */
3657             CURMAD('X', PL_thistoken);
3658     }
3659
3660     /* last whitespace of a sublex? */
3661     if (optype == ')' && PL_endwhite) {
3662         CURMAD('X', PL_endwhite);
3663     }
3664
3665     if (!PL_thismad) {
3666
3667         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3668         if (!PL_thiswhite && !PL_endwhite && !optype) {
3669             sv_free(PL_thistoken);
3670             PL_thistoken = 0;
3671             return 0;
3672         }
3673
3674         /* put off final whitespace till peg */
3675         if (optype == ';' && !PL_rsfp) {
3676             PL_nextwhite = PL_thiswhite;
3677             PL_thiswhite = 0;
3678         }
3679         else if (PL_thisopen) {
3680             CURMAD('q', PL_thisopen);
3681             if (PL_thistoken)
3682                 sv_free(PL_thistoken);
3683             PL_thistoken = 0;
3684         }
3685         else {
3686             /* Store actual token text as madprop X */
3687             CURMAD('X', PL_thistoken);
3688         }
3689
3690         if (PL_thiswhite) {
3691             /* add preceding whitespace as madprop _ */
3692             CURMAD('_', PL_thiswhite);
3693         }
3694
3695         if (PL_thisstuff) {
3696             /* add quoted material as madprop = */
3697             CURMAD('=', PL_thisstuff);
3698         }
3699
3700         if (PL_thisclose) {
3701             /* add terminating quote as madprop Q */
3702             CURMAD('Q', PL_thisclose);
3703         }
3704     }
3705
3706     /* special processing based on optype */
3707
3708     switch (optype) {
3709
3710     /* opval doesn't need a TOKEN since it can already store mp */
3711     case WORD:
3712     case METHOD:
3713     case FUNCMETH:
3714     case THING:
3715     case PMFUNC:
3716     case PRIVATEREF:
3717     case FUNC0SUB:
3718     case UNIOPSUB:
3719     case LSTOPSUB:
3720         if (pl_yylval.opval)
3721             append_madprops(PL_thismad, pl_yylval.opval, 0);
3722         PL_thismad = 0;
3723         return optype;
3724
3725     /* fake EOF */
3726     case 0:
3727         optype = PEG;
3728         if (PL_endwhite) {
3729             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3730             PL_endwhite = 0;
3731         }
3732         break;
3733
3734     case ']':
3735     case '}':
3736         if (PL_faketokens)
3737             break;
3738         /* remember any fake bracket that lexer is about to discard */
3739         if (PL_lex_brackets == 1 &&
3740             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3741         {
3742             s = PL_bufptr;
3743             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3744                 s++;
3745             if (*s == '}') {
3746                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3747                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3748                 PL_thiswhite = 0;
3749                 PL_bufptr = s - 1;
3750                 break;  /* don't bother looking for trailing comment */
3751             }
3752             else
3753                 s = PL_bufptr;
3754         }
3755         if (optype == ']')
3756             break;
3757         /* FALLTHROUGH */
3758
3759     /* attach a trailing comment to its statement instead of next token */
3760     case ';':
3761         if (PL_faketokens)
3762             break;
3763         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3764             s = PL_bufptr;
3765             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3766                 s++;
3767             if (*s == '\n' || *s == '#') {
3768                 while (s < PL_bufend && *s != '\n')
3769                     s++;
3770                 if (s < PL_bufend)
3771                     s++;
3772                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3773                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3774                 PL_thiswhite = 0;
3775                 PL_bufptr = s;
3776             }
3777         }
3778         break;
3779
3780     /* pval */
3781     case LABEL:
3782         break;
3783
3784     /* ival */
3785     default:
3786         break;
3787
3788     }
3789
3790     /* Create new token struct.  Note: opvals return early above. */
3791     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3792     PL_thismad = 0;
3793     return optype;
3794 }
3795 #endif
3796
3797 STATIC char *
3798 S_tokenize_use(pTHX_ int is_use, char *s) {
3799     dVAR;
3800
3801     PERL_ARGS_ASSERT_TOKENIZE_USE;
3802
3803     if (PL_expect != XSTATE)
3804         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3805                     is_use ? "use" : "no"));
3806     s = SKIPSPACE1(s);
3807     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3808         s = force_version(s, TRUE);
3809         if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3810             start_force(PL_curforce);
3811             NEXTVAL_NEXTTOKE.opval = NULL;
3812             force_next(WORD);
3813         }
3814         else if (*s == 'v') {
3815             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3816             s = force_version(s, FALSE);
3817         }
3818     }
3819     else {
3820         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3821         s = force_version(s, FALSE);
3822     }
3823     pl_yylval.ival = is_use;
3824     return s;
3825 }
3826 #ifdef DEBUGGING
3827     static const char* const exp_name[] =
3828         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3829           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3830         };
3831 #endif
3832
3833 /*
3834   yylex
3835
3836   Works out what to call the token just pulled out of the input
3837   stream.  The yacc parser takes care of taking the ops we return and
3838   stitching them into a tree.
3839
3840   Returns:
3841     PRIVATEREF
3842
3843   Structure:
3844       if read an identifier
3845           if we're in a my declaration
3846               croak if they tried to say my($foo::bar)
3847               build the ops for a my() declaration
3848           if it's an access to a my() variable
3849               are we in a sort block?
3850                   croak if my($a); $a <=> $b
3851               build ops for access to a my() variable
3852           if in a dq string, and they've said @foo and we can't find @foo
3853               croak
3854           build ops for a bareword
3855       if we already built the token before, use it.
3856 */
3857
3858
3859 #ifdef __SC__
3860 #pragma segment Perl_yylex
3861 #endif
3862 int
3863 Perl_yylex(pTHX)
3864 {
3865     dVAR;
3866     register char *s = PL_bufptr;
3867     register char *d;
3868     STRLEN len;
3869     bool bof = FALSE;
3870
3871     /* orig_keyword, gvp, and gv are initialized here because
3872      * jump to the label just_a_word_zero can bypass their
3873      * initialization later. */
3874     I32 orig_keyword = 0;
3875     GV *gv = NULL;
3876     GV **gvp = NULL;
3877
3878     DEBUG_T( {
3879         SV* tmp = newSVpvs("");
3880         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3881             (IV)CopLINE(PL_curcop),
3882             lex_state_names[PL_lex_state],
3883             exp_name[PL_expect],
3884             pv_display(tmp, s, strlen(s), 0, 60));
3885         SvREFCNT_dec(tmp);
3886     } );
3887     /* check if there's an identifier for us to look at */
3888     if (PL_pending_ident)
3889         return REPORT(S_pending_ident(aTHX));
3890
3891     /* no identifier pending identification */
3892
3893     switch (PL_lex_state) {
3894 #ifdef COMMENTARY
3895     case LEX_NORMAL:            /* Some compilers will produce faster */
3896     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3897         break;
3898 #endif
3899
3900     /* when we've already built the next token, just pull it out of the queue */
3901     case LEX_KNOWNEXT:
3902 #ifdef PERL_MAD
3903         PL_lasttoke--;
3904         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3905         if (PL_madskills) {
3906             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3907             PL_nexttoke[PL_lasttoke].next_mad = 0;
3908             if (PL_thismad && PL_thismad->mad_key == '_') {
3909                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3910                 PL_thismad->mad_val = 0;
3911                 mad_free(PL_thismad);
3912                 PL_thismad = 0;
3913             }
3914         }
3915         if (!PL_lasttoke) {
3916             PL_lex_state = PL_lex_defer;
3917             PL_expect = PL_lex_expect;
3918             PL_lex_defer = LEX_NORMAL;
3919             if (!PL_nexttoke[PL_lasttoke].next_type)
3920                 return yylex();
3921         }
3922 #else
3923         PL_nexttoke--;
3924         pl_yylval = PL_nextval[PL_nexttoke];
3925         if (!PL_nexttoke) {
3926             PL_lex_state = PL_lex_defer;
3927             PL_expect = PL_lex_expect;
3928             PL_lex_defer = LEX_NORMAL;
3929         }
3930 #endif
3931 #ifdef PERL_MAD
3932         /* FIXME - can these be merged?  */
3933         return(PL_nexttoke[PL_lasttoke].next_type);
3934 #else
3935         return REPORT(PL_nexttype[PL_nexttoke]);
3936 #endif
3937
3938     /* interpolated case modifiers like \L \U, including \Q and \E.
3939        when we get here, PL_bufptr is at the \
3940     */
3941     case LEX_INTERPCASEMOD:
3942 #ifdef DEBUGGING
3943         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3944             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3945 #endif
3946         /* handle \E or end of string */
3947         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3948             /* if at a \E */
3949             if (PL_lex_casemods) {
3950                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3951                 PL_lex_casestack[PL_lex_casemods] = '\0';
3952
3953                 if (PL_bufptr != PL_bufend
3954                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3955                     PL_bufptr += 2;
3956                     PL_lex_state = LEX_INTERPCONCAT;
3957 #ifdef PERL_MAD
3958                     if (PL_madskills)
3959                         PL_thistoken = newSVpvs("\\E");
3960 #endif
3961                 }
3962                 return REPORT(')');
3963             }
3964 #ifdef PERL_MAD
3965             while (PL_bufptr != PL_bufend &&
3966               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3967                 if (!PL_thiswhite)
3968                     PL_thiswhite = newSVpvs("");
3969                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3970                 PL_bufptr += 2;
3971             }
3972 #else
3973             if (PL_bufptr != PL_bufend)
3974                 PL_bufptr += 2;
3975 #endif
3976             PL_lex_state = LEX_INTERPCONCAT;
3977             return yylex();
3978         }
3979         else {
3980             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3981               "### Saw case modifier\n"); });
3982             s = PL_bufptr + 1;
3983             if (s[1] == '\\' && s[2] == 'E') {
3984 #ifdef PERL_MAD
3985                 if (!PL_thiswhite)
3986                     PL_thiswhite = newSVpvs("");
3987                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3988 #endif
3989                 PL_bufptr = s + 3;
3990                 PL_lex_state = LEX_INTERPCONCAT;
3991                 return yylex();
3992             }
3993             else {
3994                 I32 tmp;
3995                 if (!PL_madskills) /* when just compiling don't need correct */
3996                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3997                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
3998                 if ((*s == 'L' || *s == 'U') &&
3999                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4000                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4001                     return REPORT(')');
4002                 }
4003                 if (PL_lex_casemods > 10)
4004                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4005                 PL_lex_casestack[PL_lex_casemods++] = *s;
4006                 PL_lex_casestack[PL_lex_casemods] = '\0';
4007                 PL_lex_state = LEX_INTERPCONCAT;
4008                 start_force(PL_curforce);
4009                 NEXTVAL_NEXTTOKE.ival = 0;
4010                 force_next('(');
4011                 start_force(PL_curforce);
4012                 if (*s == 'l')
4013                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4014                 else if (*s == 'u')
4015                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4016                 else if (*s == 'L')
4017                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4018                 else if (*s == 'U')
4019                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4020                 else if (*s == 'Q')
4021                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4022                 else
4023                     Perl_croak(aTHX_ "panic: yylex");
4024                 if (PL_madskills) {
4025                     SV* const tmpsv = newSVpvs("\\ ");
4026                     /* replace the space with the character we want to escape
4027                      */
4028                     SvPVX(tmpsv)[1] = *s;
4029                     curmad('_', tmpsv);
4030                 }
4031                 PL_bufptr = s + 1;
4032             }
4033             force_next(FUNC);
4034             if (PL_lex_starts) {
4035                 s = PL_bufptr;
4036                 PL_lex_starts = 0;
4037 #ifdef PERL_MAD
4038                 if (PL_madskills) {
4039                     if (PL_thistoken)
4040                         sv_free(PL_thistoken);
4041                     PL_thistoken = newSVpvs("");
4042                 }
4043 #endif
4044                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4045                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4046                     OPERATOR(',');
4047                 else
4048                     Aop(OP_CONCAT);
4049             }
4050             else
4051                 return yylex();
4052         }
4053
4054     case LEX_INTERPPUSH:
4055         return REPORT(sublex_push());
4056
4057     case LEX_INTERPSTART:
4058         if (PL_bufptr == PL_bufend)
4059             return REPORT(sublex_done());
4060         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4061               "### Interpolated variable\n"); });
4062         PL_expect = XTERM;
4063         PL_lex_dojoin = (*PL_bufptr == '@');
4064         PL_lex_state = LEX_INTERPNORMAL;
4065         if (PL_lex_dojoin) {
4066             start_force(PL_curforce);
4067             NEXTVAL_NEXTTOKE.ival = 0;
4068             force_next(',');
4069             start_force(PL_curforce);
4070             force_ident("\"", '$');
4071             start_force(PL_curforce);
4072             NEXTVAL_NEXTTOKE.ival = 0;
4073             force_next('$');
4074             start_force(PL_curforce);
4075             NEXTVAL_NEXTTOKE.ival = 0;
4076             force_next('(');
4077             start_force(PL_curforce);
4078             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4079             force_next(FUNC);
4080         }
4081         if (PL_lex_starts++) {
4082             s = PL_bufptr;
4083 #ifdef PERL_MAD
4084             if (PL_madskills) {
4085                 if (PL_thistoken)
4086                     sv_free(PL_thistoken);
4087                 PL_thistoken = newSVpvs("");
4088             }
4089 #endif
4090             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4091             if (!PL_lex_casemods && PL_lex_inpat)
4092                 OPERATOR(',');
4093             else
4094                 Aop(OP_CONCAT);
4095         }
4096         return yylex();
4097
4098     case LEX_INTERPENDMAYBE:
4099         if (intuit_more(PL_bufptr)) {
4100             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4101             break;
4102         }
4103         /* FALL THROUGH */
4104
4105     case LEX_INTERPEND:
4106         if (PL_lex_dojoin) {
4107             PL_lex_dojoin = FALSE;
4108             PL_lex_state = LEX_INTERPCONCAT;
4109 #ifdef PERL_MAD
4110             if (PL_madskills) {
4111                 if (PL_thistoken)
4112                     sv_free(PL_thistoken);
4113                 PL_thistoken = newSVpvs("");
4114             }
4115 #endif
4116             return REPORT(')');
4117         }
4118         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4119             && SvEVALED(PL_lex_repl))
4120         {
4121             if (PL_bufptr != PL_bufend)
4122                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4123             PL_lex_repl = NULL;
4124         }
4125         /* FALLTHROUGH */
4126     case LEX_INTERPCONCAT:
4127 #ifdef DEBUGGING
4128         if (PL_lex_brackets)
4129             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4130 #endif
4131         if (PL_bufptr == PL_bufend)
4132             return REPORT(sublex_done());
4133
4134         if (SvIVX(PL_linestr) == '\'') {
4135             SV *sv = newSVsv(PL_linestr);
4136             if (!PL_lex_inpat)
4137                 sv = tokeq(sv);
4138             else if ( PL_hints & HINT_NEW_RE )
4139                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4140             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4141             s = PL_bufend;
4142         }
4143         else {
4144             s = scan_const(PL_bufptr);
4145             if (*s == '\\')
4146                 PL_lex_state = LEX_INTERPCASEMOD;
4147             else
4148                 PL_lex_state = LEX_INTERPSTART;
4149         }
4150
4151         if (s != PL_bufptr) {
4152             start_force(PL_curforce);
4153             if (PL_madskills) {
4154                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4155             }
4156             NEXTVAL_NEXTTOKE = pl_yylval;
4157             PL_expect = XTERM;
4158             force_next(THING);
4159             if (PL_lex_starts++) {
4160 #ifdef PERL_MAD
4161                 if (PL_madskills) {
4162                     if (PL_thistoken)
4163                         sv_free(PL_thistoken);
4164                     PL_thistoken = newSVpvs("");
4165                 }
4166 #endif
4167                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4168                 if (!PL_lex_casemods && PL_lex_inpat)
4169                     OPERATOR(',');
4170                 else
4171                     Aop(OP_CONCAT);
4172             }
4173             else {
4174                 PL_bufptr = s;
4175                 return yylex();
4176             }
4177         }
4178
4179         return yylex();
4180     case LEX_FORMLINE:
4181         PL_lex_state = LEX_NORMAL;
4182         s = scan_formline(PL_bufptr);
4183         if (!PL_lex_formbrack)
4184             goto rightbracket;
4185         OPERATOR(';');
4186     }
4187
4188     s = PL_bufptr;
4189     PL_oldoldbufptr = PL_oldbufptr;
4190     PL_oldbufptr = s;
4191
4192   retry:
4193 #ifdef PERL_MAD
4194     if (PL_thistoken) {
4195         sv_free(PL_thistoken);
4196         PL_thistoken = 0;
4197     }
4198     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4199 #endif
4200     switch (*s) {
4201     default:
4202         if (isIDFIRST_lazy_if(s,UTF))
4203             goto keylookup;
4204         {
4205         unsigned char c = *s;
4206         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4207         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4208             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4209         } else {
4210             d = PL_linestart;
4211         }
4212         *s = '\0';
4213         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4214     }
4215     case 4:
4216     case 26:
4217         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4218     case 0:
4219 #ifdef PERL_MAD
4220         if (PL_madskills)
4221             PL_faketokens = 0;
4222 #endif
4223         if (!PL_rsfp) {
4224             PL_last_uni = 0;
4225             PL_last_lop = 0;
4226             if (PL_lex_brackets) {
4227                 yyerror((const char *)
4228                         (PL_lex_formbrack
4229                          ? "Format not terminated"
4230                          : "Missing right curly or square bracket"));
4231             }
4232             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4233                         "### Tokener got EOF\n");
4234             } );
4235             TOKEN(0);
4236         }
4237         if (s++ < PL_bufend)
4238             goto retry;                 /* ignore stray nulls */
4239         PL_last_uni = 0;
4240         PL_last_lop = 0;
4241         if (!PL_in_eval && !PL_preambled) {
4242             PL_preambled = TRUE;
4243 #ifdef PERL_MAD
4244             if (PL_madskills)
4245                 PL_faketokens = 1;
4246 #endif
4247             if (PL_perldb) {
4248                 /* Generate a string of Perl code to load the debugger.
4249                  * If PERL5DB is set, it will return the contents of that,
4250                  * otherwise a compile-time require of perl5db.pl.  */
4251
4252                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4253
4254                 if (pdb) {
4255                     sv_setpv(PL_linestr, pdb);
4256                     sv_catpvs(PL_linestr,";");
4257                 } else {
4258                     SETERRNO(0,SS_NORMAL);
4259                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4260                 }
4261             } else
4262                 sv_setpvs(PL_linestr,"");
4263             if (PL_preambleav) {
4264                 SV **svp = AvARRAY(PL_preambleav);
4265                 SV **const end = svp + AvFILLp(PL_preambleav);
4266                 while(svp <= end) {
4267                     sv_catsv(PL_linestr, *svp);
4268                     ++svp;
4269                     sv_catpvs(PL_linestr, ";");
4270                 }
4271                 sv_free(MUTABLE_SV(PL_preambleav));
4272                 PL_preambleav = NULL;
4273             }
4274             if (PL_minus_E)
4275                 sv_catpvs(PL_linestr,
4276                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4277             if (PL_minus_n || PL_minus_p) {
4278                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4279                 if (PL_minus_l)
4280                     sv_catpvs(PL_linestr,"chomp;");
4281                 if (PL_minus_a) {
4282                     if (PL_minus_F) {
4283                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4284                              || *PL_splitstr == '"')
4285                               && strchr(PL_splitstr + 1, *PL_splitstr))
4286                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4287                         else {
4288                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4289                                bytes can be used as quoting characters.  :-) */
4290                             const char *splits = PL_splitstr;
4291                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4292                             do {
4293                                 /* Need to \ \s  */
4294                                 if (*splits == '\\')
4295                                     sv_catpvn(PL_linestr, splits, 1);
4296                                 sv_catpvn(PL_linestr, splits, 1);
4297                             } while (*splits++);
4298                             /* This loop will embed the trailing NUL of
4299                                PL_linestr as the last thing it does before
4300                                terminating.  */
4301                             sv_catpvs(PL_linestr, ");");
4302                         }
4303                     }
4304                     else
4305                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4306                 }
4307             }
4308             sv_catpvs(PL_linestr, "\n");
4309             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4310             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4311             PL_last_lop = PL_last_uni = NULL;
4312             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4313                 update_debugger_info(PL_linestr, NULL, 0);
4314             goto retry;
4315         }
4316         do {
4317             U32 fake_eof = 0;
4318             bof = PL_rsfp ? TRUE : FALSE;
4319             if (0) {
4320               fake_eof:
4321                 fake_eof = LEX_FAKE_EOF;
4322             }
4323             PL_bufptr = PL_bufend;
4324             if (!lex_next_chunk(fake_eof)) {
4325                 s = PL_bufptr;
4326                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4327             }
4328 #ifdef PERL_MAD
4329             if (!PL_rsfp)
4330                 PL_realtokenstart = -1;
4331 #endif
4332             s = PL_bufptr;
4333             /* If it looks like the start of a BOM or raw UTF-16,
4334              * check if it in fact is. */
4335             if (bof &&
4336                      (*s == 0 ||
4337                       *(U8*)s == 0xEF ||
4338                       *(U8*)s >= 0xFE ||
4339                       s[1] == 0)) {
4340 #ifdef PERLIO_IS_STDIO
4341 #  ifdef __GNU_LIBRARY__
4342 #    if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
4343 #      define FTELL_FOR_PIPE_IS_BROKEN
4344 #    endif
4345 #  else
4346 #    ifdef __GLIBC__
4347 #      if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
4348 #        define FTELL_FOR_PIPE_IS_BROKEN
4349 #      endif
4350 #    endif
4351 #  endif
4352 #endif
4353                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4354                 if (bof) {
4355                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4356                     s = swallow_bom((U8*)s);
4357                 }
4358             }
4359             if (PL_doextract) {
4360                 /* Incest with pod. */
4361 #ifdef PERL_MAD
4362                 if (PL_madskills)
4363                     sv_catsv(PL_thiswhite, PL_linestr);
4364 #endif
4365                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4366                     sv_setpvs(PL_linestr, "");
4367                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4368                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4369                     PL_last_lop = PL_last_uni = NULL;
4370                     PL_doextract = FALSE;
4371                 }
4372             }
4373             incline(s);
4374         } while (PL_doextract);
4375         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4376         if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4377             update_debugger_info(PL_linestr, NULL, 0);
4378         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4379         PL_last_lop = PL_last_uni = NULL;
4380         if (CopLINE(PL_curcop) == 1) {
4381             while (s < PL_bufend && isSPACE(*s))
4382                 s++;
4383             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4384                 s++;
4385 #ifdef PERL_MAD
4386             if (PL_madskills)
4387                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4388 #endif
4389             d = NULL;
4390             if (!PL_in_eval) {
4391                 if (*s == '#' && *(s+1) == '!')
4392                     d = s + 2;
4393 #ifdef ALTERNATE_SHEBANG
4394                 else {
4395                     static char const as[] = ALTERNATE_SHEBANG;
4396                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4397                         d = s + (sizeof(as) - 1);
4398                 }
4399 #endif /* ALTERNATE_SHEBANG */
4400             }
4401             if (d) {
4402                 char *ipath;
4403                 char *ipathend;
4404
4405                 while (isSPACE(*d))
4406                     d++;
4407                 ipath = d;
4408                 while (*d && !isSPACE(*d))
4409                     d++;
4410                 ipathend = d;
4411
4412 #ifdef ARG_ZERO_IS_SCRIPT
4413                 if (ipathend > ipath) {
4414                     /*
4415                      * HP-UX (at least) sets argv[0] to the script name,
4416                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4417                      * at least, set argv[0] to the basename of the Perl
4418                      * interpreter. So, having found "#!", we'll set it right.
4419                      */
4420                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4421                                                     SVt_PV)); /* $^X */
4422                     assert(SvPOK(x) || SvGMAGICAL(x));
4423                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4424                         sv_setpvn(x, ipath, ipathend - ipath);
4425                         SvSETMAGIC(x);
4426                     }
4427                     else {
4428                         STRLEN blen;
4429                         STRLEN llen;
4430                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4431                         const char * const lstart = SvPV_const(x,llen);
4432                         if (llen < blen) {
4433                             bstart += blen - llen;
4434                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4435                                 sv_setpvn(x, ipath, ipathend - ipath);
4436                                 SvSETMAGIC(x);
4437                             }
4438                         }
4439                     }
4440                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4441                 }
4442 #endif /* ARG_ZERO_IS_SCRIPT */
4443
4444                 /*
4445                  * Look for options.
4446                  */
4447                 d = instr(s,"perl -");
4448                 if (!d) {
4449                     d = instr(s,"perl");
4450 #if defined(DOSISH)
4451                     /* avoid getting into infinite loops when shebang
4452                      * line contains "Perl" rather than "perl" */
4453                     if (!d) {
4454                         for (d = ipathend-4; d >= ipath; --d) {
4455                             if ((*d == 'p' || *d == 'P')
4456                                 && !ibcmp(d, "perl", 4))
4457                             {
4458                                 break;
4459                             }
4460                         }
4461                         if (d < ipath)
4462                             d = NULL;
4463                     }
4464 #endif
4465                 }
4466 #ifdef ALTERNATE_SHEBANG
4467                 /*
4468                  * If the ALTERNATE_SHEBANG on this system starts with a
4469                  * character that can be part of a Perl expression, then if
4470                  * we see it but not "perl", we're probably looking at the
4471                  * start of Perl code, not a request to hand off to some
4472                  * other interpreter.  Similarly, if "perl" is there, but
4473                  * not in the first 'word' of the line, we assume the line
4474                  * contains the start of the Perl program.
4475                  */
4476                 if (d && *s != '#') {
4477                     const char *c = ipath;
4478                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4479                         c++;
4480                     if (c < d)
4481                         d = NULL;       /* "perl" not in first word; ignore */
4482                     else
4483                         *s = '#';       /* Don't try to parse shebang line */
4484                 }
4485 #endif /* ALTERNATE_SHEBANG */
4486                 if (!d &&
4487                     *s == '#' &&
4488                     ipathend > ipath &&
4489                     !PL_minus_c &&
4490                     !instr(s,"indir") &&
4491                     instr(PL_origargv[0],"perl"))
4492                 {
4493                     dVAR;
4494                     char **newargv;
4495
4496                     *ipathend = '\0';
4497                     s = ipathend + 1;
4498                     while (s < PL_bufend && isSPACE(*s))
4499                         s++;
4500                     if (s < PL_bufend) {
4501                         Newx(newargv,PL_origargc+3,char*);
4502                         newargv[1] = s;
4503                         while (s < PL_bufend && !isSPACE(*s))
4504                             s++;
4505                         *s = '\0';
4506                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4507                     }
4508                     else
4509                         newargv = PL_origargv;
4510                     newargv[0] = ipath;
4511                     PERL_FPU_PRE_EXEC
4512                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4513                     PERL_FPU_POST_EXEC
4514                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4515                 }
4516                 if (d) {
4517                     while (*d && !isSPACE(*d))
4518                         d++;
4519                     while (SPACE_OR_TAB(*d))
4520                         d++;
4521
4522                     if (*d++ == '-') {
4523                         const bool switches_done = PL_doswitches;
4524                         const U32 oldpdb = PL_perldb;
4525                         const bool oldn = PL_minus_n;
4526                         const bool oldp = PL_minus_p;
4527                         const char *d1 = d;
4528
4529                         do {
4530                             bool baduni = FALSE;
4531                             if (*d1 == 'C') {
4532                                 const char *d2 = d1 + 1;
4533                                 if (parse_unicode_opts((const char **)&d2)
4534                                     != PL_unicode)
4535                                     baduni = TRUE;
4536                             }
4537                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4538                                 const char * const m = d1;
4539                                 while (*d1 && !isSPACE(*d1))
4540                                     d1++;
4541                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4542                                       (int)(d1 - m), m);
4543                             }
4544                             d1 = moreswitches(d1);
4545                         } while (d1);
4546                         if (PL_doswitches && !switches_done) {
4547                             int argc = PL_origargc;
4548                             char **argv = PL_origargv;
4549                             do {
4550                                 argc--,argv++;
4551                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4552                             init_argv_symbols(argc,argv);
4553                         }
4554                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4555                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4556                               /* if we have already added "LINE: while (<>) {",
4557                                  we must not do it again */
4558                         {
4559                             sv_setpvs(PL_linestr, "");
4560                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4561                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4562                             PL_last_lop = PL_last_uni = NULL;
4563                             PL_preambled = FALSE;
4564                             if (PERLDB_LINE || PERLDB_SAVESRC)
4565                                 (void)gv_fetchfile(PL_origfilename);
4566                             goto retry;
4567                         }
4568                     }
4569                 }
4570             }
4571         }
4572         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4573             PL_bufptr = s;
4574             PL_lex_state = LEX_FORMLINE;
4575             return yylex();
4576         }
4577         goto retry;
4578     case '\r':
4579 #ifdef PERL_STRICT_CR
4580         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4581         Perl_croak(aTHX_
4582       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4583 #endif
4584     case ' ': case '\t': case '\f': case 013:
4585 #ifdef PERL_MAD
4586         PL_realtokenstart = -1;
4587         if (!PL_thiswhite)
4588             PL_thiswhite = newSVpvs("");
4589         sv_catpvn(PL_thiswhite, s, 1);
4590 #endif
4591         s++;
4592         goto retry;
4593     case '#':
4594     case '\n':
4595 #ifdef PERL_MAD
4596         PL_realtokenstart = -1;
4597         if (PL_madskills)
4598             PL_faketokens = 0;
4599 #endif
4600         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4601             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4602                 /* handle eval qq[#line 1 "foo"\n ...] */
4603                 CopLINE_dec(PL_curcop);
4604                 incline(s);
4605             }
4606             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4607                 s = SKIPSPACE0(s);
4608                 if (!PL_in_eval || PL_rsfp)
4609                     incline(s);
4610             }
4611             else {
4612                 d = s;
4613                 while (d < PL_bufend && *d != '\n')
4614                     d++;
4615                 if (d < PL_bufend)
4616                     d++;
4617                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4618                   Perl_croak(aTHX_ "panic: input overflow");
4619 #ifdef PERL_MAD
4620                 if (PL_madskills)
4621                     PL_thiswhite = newSVpvn(s, d - s);
4622 #endif
4623                 s = d;
4624                 incline(s);
4625             }
4626             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4627                 PL_bufptr = s;
4628                 PL_lex_state = LEX_FORMLINE;
4629                 return yylex();
4630             }
4631         }
4632         else {
4633 #ifdef PERL_MAD
4634             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4635                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4636                     PL_faketokens = 0;
4637                     s = SKIPSPACE0(s);
4638                     TOKEN(PEG); /* make sure any #! line is accessible */
4639                 }
4640                 s = SKIPSPACE0(s);
4641             }
4642             else {
4643 /*              if (PL_madskills && PL_lex_formbrack) { */
4644                     d = s;
4645                     while (d < PL_bufend && *d != '\n')
4646                         d++;
4647                     if (d < PL_bufend)
4648                         d++;
4649                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4650                       Perl_croak(aTHX_ "panic: input overflow");
4651                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4652                         if (!PL_thiswhite)
4653                             PL_thiswhite = newSVpvs("");
4654                         if (CopLINE(PL_curcop) == 1) {
4655                             sv_setpvs(PL_thiswhite, "");
4656                             PL_faketokens = 0;
4657                         }
4658                         sv_catpvn(PL_thiswhite, s, d - s);
4659                     }
4660                     s = d;
4661 /*              }
4662                 *s = '\0';
4663                 PL_bufend = s; */
4664             }
4665 #else
4666             *s = '\0';
4667             PL_bufend = s;
4668 #endif
4669         }
4670         goto retry;
4671     case '-':
4672         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4673             I32 ftst = 0;
4674             char tmp;
4675
4676             s++;
4677             PL_bufptr = s;
4678             tmp = *s++;
4679
4680             while (s < PL_bufend && SPACE_OR_TAB(*s))
4681                 s++;
4682
4683             if (strnEQ(s,"=>",2)) {
4684                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4685                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4686                 OPERATOR('-');          /* unary minus */
4687             }
4688             PL_last_uni = PL_oldbufptr;
4689             switch (tmp) {
4690             case 'r': ftst = OP_FTEREAD;        break;
4691             case 'w': ftst = OP_FTEWRITE;       break;
4692             case 'x': ftst = OP_FTEEXEC;        break;
4693             case 'o': ftst = OP_FTEOWNED;       break;
4694             case 'R': ftst = OP_FTRREAD;        break;
4695             case 'W': ftst = OP_FTRWRITE;       break;
4696             case 'X': ftst = OP_FTREXEC;        break;
4697             case 'O': ftst = OP_FTROWNED;       break;
4698             case 'e': ftst = OP_FTIS;           break;
4699             case 'z': ftst = OP_FTZERO;         break;
4700             case 's': ftst = OP_FTSIZE;         break;
4701             case 'f': ftst = OP_FTFILE;         break;
4702             case 'd': ftst = OP_FTDIR;          break;
4703             case 'l': ftst = OP_FTLINK;         break;
4704             case 'p': ftst = OP_FTPIPE;         break;
4705             case 'S': ftst = OP_FTSOCK;         break;
4706             case 'u': ftst = OP_FTSUID;         break;
4707             case 'g': ftst = OP_FTSGID;         break;
4708             case 'k': ftst = OP_FTSVTX;         break;
4709             case 'b': ftst = OP_FTBLK;          break;
4710             case 'c': ftst = OP_FTCHR;          break;
4711             case 't': ftst = OP_FTTTY;          break;
4712             case 'T': ftst = OP_FTTEXT;         break;
4713             case 'B': ftst = OP_FTBINARY;       break;
4714             case 'M': case 'A': case 'C':
4715                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4716                 switch (tmp) {
4717                 case 'M': ftst = OP_FTMTIME;    break;
4718                 case 'A': ftst = OP_FTATIME;    break;
4719                 case 'C': ftst = OP_FTCTIME;    break;
4720                 default:                        break;
4721                 }
4722                 break;
4723             default:
4724                 break;
4725             }
4726             if (ftst) {
4727                 PL_last_lop_op = (OPCODE)ftst;
4728                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4729                         "### Saw file test %c\n", (int)tmp);
4730                 } );
4731                 FTST(ftst);
4732             }
4733             else {
4734                 /* Assume it was a minus followed by a one-letter named
4735                  * subroutine call (or a -bareword), then. */
4736                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4737                         "### '-%c' looked like a file test but was not\n",
4738                         (int) tmp);
4739                 } );
4740                 s = --PL_bufptr;
4741             }
4742         }
4743         {
4744             const char tmp = *s++;
4745             if (*s == tmp) {
4746                 s++;
4747                 if (PL_expect == XOPERATOR)
4748                     TERM(POSTDEC);
4749                 else
4750                     OPERATOR(PREDEC);
4751             }
4752             else if (*s == '>') {
4753                 s++;
4754                 s = SKIPSPACE1(s);
4755                 if (isIDFIRST_lazy_if(s,UTF)) {
4756                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4757                     TOKEN(ARROW);
4758                 }
4759                 else if (*s == '$')
4760                     OPERATOR(ARROW);
4761                 else
4762                     TERM(ARROW);
4763             }
4764             if (PL_expect == XOPERATOR)
4765                 Aop(OP_SUBTRACT);
4766             else {
4767                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4768                     check_uni();
4769                 OPERATOR('-');          /* unary minus */
4770             }
4771         }
4772
4773     case '+':
4774         {
4775             const char tmp = *s++;
4776             if (*s == tmp) {
4777                 s++;
4778                 if (PL_expect == XOPERATOR)
4779                     TERM(POSTINC);
4780                 else
4781                     OPERATOR(PREINC);
4782             }
4783             if (PL_expect == XOPERATOR)
4784                 Aop(OP_ADD);
4785             else {
4786                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4787                     check_uni();
4788                 OPERATOR('+');
4789             }
4790         }
4791
4792     case '*':
4793         if (PL_expect != XOPERATOR) {
4794             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4795             PL_expect = XOPERATOR;
4796             force_ident(PL_tokenbuf, '*');
4797             if (!*PL_tokenbuf)
4798                 PREREF('*');
4799             TERM('*');
4800         }
4801         s++;
4802         if (*s == '*') {
4803             s++;
4804             PWop(OP_POW);
4805         }
4806         Mop(OP_MULTIPLY);
4807
4808     case '%':
4809         if (PL_expect == XOPERATOR) {
4810             ++s;
4811             Mop(OP_MODULO);
4812         }
4813         PL_tokenbuf[0] = '%';
4814         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4815                 sizeof PL_tokenbuf - 1, FALSE);
4816         if (!PL_tokenbuf[1]) {
4817             PREREF('%');
4818         }
4819         PL_pending_ident = '%';
4820         TERM('%');
4821
4822     case '^':
4823         s++;
4824         BOop(OP_BIT_XOR);
4825     case '[':
4826         PL_lex_brackets++;
4827         {
4828             const char tmp = *s++;
4829             OPERATOR(tmp);
4830         }
4831     case '~':
4832         if (s[1] == '~'
4833             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4834         {
4835             s += 2;
4836             Eop(OP_SMARTMATCH);
4837         }
4838     case ',':
4839         {
4840             const char tmp = *s++;
4841             OPERATOR(tmp);
4842         }
4843     case ':':
4844         if (s[1] == ':') {
4845             len = 0;
4846             goto just_a_word_zero_gv;
4847         }
4848         s++;
4849         switch (PL_expect) {
4850             OP *attrs;
4851 #ifdef PERL_MAD
4852             I32 stuffstart;
4853 #endif
4854         case XOPERATOR:
4855             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4856                 break;
4857             PL_bufptr = s;      /* update in case we back off */
4858             if (*s == '=') {
4859                 deprecate(":= for an empty attribute list");
4860             }
4861             goto grabattrs;
4862         case XATTRBLOCK:
4863             PL_expect = XBLOCK;
4864             goto grabattrs;
4865         case XATTRTERM:
4866             PL_expect = XTERMBLOCK;
4867          grabattrs:
4868 #ifdef PERL_MAD
4869             stuffstart = s - SvPVX(PL_linestr) - 1;
4870 #endif
4871             s = PEEKSPACE(s);
4872             attrs = NULL;
4873             while (isIDFIRST_lazy_if(s,UTF)) {
4874                 I32 tmp;
4875                 SV *sv;
4876                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4877                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4878                     if (tmp < 0) tmp = -tmp;
4879                     switch (tmp) {
4880                     case KEY_or:
4881                     case KEY_and:
4882                     case KEY_for:
4883                     case KEY_foreach:
4884                     case KEY_unless:
4885                     case KEY_if:
4886                     case KEY_while:
4887                     case KEY_until:
4888                         goto got_attrs;
4889                     default:
4890                         break;
4891                     }
4892                 }
4893                 sv = newSVpvn(s, len);
4894                 if (*d == '(') {
4895                     d = scan_str(d,TRUE,TRUE);
4896                     if (!d) {
4897                         /* MUST advance bufptr here to avoid bogus
4898                            "at end of line" context messages from yyerror().
4899                          */
4900                         PL_bufptr = s + len;
4901                         yyerror("Unterminated attribute parameter in attribute list");
4902                         if (attrs)
4903                             op_free(attrs);
4904                         sv_free(sv);
4905                         return REPORT(0);       /* EOF indicator */
4906                     }
4907                 }
4908                 if (PL_lex_stuff) {
4909                     sv_catsv(sv, PL_lex_stuff);
4910                     attrs = append_elem(OP_LIST, attrs,
4911                                         newSVOP(OP_CONST, 0, sv));
4912                     SvREFCNT_dec(PL_lex_stuff);
4913                     PL_lex_stuff = NULL;
4914                 }
4915                 else {
4916                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4917                         sv_free(sv);
4918                         if (PL_in_my == KEY_our) {
4919                             deprecate(":unique");
4920                         }
4921                         else
4922                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4923                     }
4924
4925                     /* NOTE: any CV attrs applied here need to be part of
4926                        the CVf_BUILTIN_ATTRS define in cv.h! */
4927                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4928                         sv_free(sv);
4929                         CvLVALUE_on(PL_compcv);
4930                     }
4931                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4932                         sv_free(sv);
4933                         deprecate(":locked");
4934                     }
4935                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4936                         sv_free(sv);
4937                         CvMETHOD_on(PL_compcv);
4938                     }
4939                     /* After we've set the flags, it could be argued that
4940                        we don't need to do the attributes.pm-based setting
4941                        process, and shouldn't bother appending recognized
4942                        flags.  To experiment with that, uncomment the
4943                        following "else".  (Note that's already been
4944                        uncommented.  That keeps the above-applied built-in
4945                        attributes from being intercepted (and possibly
4946                        rejected) by a package's attribute routines, but is
4947                        justified by the performance win for the common case
4948                        of applying only built-in attributes.) */
4949                     else
4950                         attrs = append_elem(OP_LIST, attrs,
4951                                             newSVOP(OP_CONST, 0,
4952                                                     sv));
4953                 }
4954                 s = PEEKSPACE(d);
4955                 if (*s == ':' && s[1] != ':')
4956                     s = PEEKSPACE(s+1);
4957                 else if (s == d)
4958                     break;      /* require real whitespace or :'s */
4959                 /* XXX losing whitespace on sequential attributes here */
4960             }
4961             {
4962                 const char tmp
4963                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4964                 if (*s != ';' && *s != '}' && *s != tmp
4965                     && (tmp != '=' || *s != ')')) {
4966                     const char q = ((*s == '\'') ? '"' : '\'');
4967                     /* If here for an expression, and parsed no attrs, back
4968                        off. */
4969                     if (tmp == '=' && !attrs) {
4970                         s = PL_bufptr;
4971                         break;
4972                     }
4973                     /* MUST advance bufptr here to avoid bogus "at end of line"
4974                        context messages from yyerror().
4975                     */
4976                     PL_bufptr = s;
4977                     yyerror( (const char *)
4978                              (*s
4979                               ? Perl_form(aTHX_ "Invalid separator character "
4980                                           "%c%c%c in attribute list", q, *s, q)
4981                               : "Unterminated attribute list" ) );
4982                     if (attrs)
4983                         op_free(attrs);
4984                     OPERATOR(':');
4985                 }
4986             }
4987         got_attrs:
4988             if (attrs) {
4989                 start_force(PL_curforce);
4990                 NEXTVAL_NEXTTOKE.opval = attrs;
4991                 CURMAD('_', PL_nextwhite);
4992                 force_next(THING);
4993             }
4994 #ifdef PERL_MAD
4995             if (PL_madskills) {
4996                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4997                                      (s - SvPVX(PL_linestr)) - stuffstart);
4998             }
4999 #endif
5000             TOKEN(COLONATTR);
5001         }
5002         OPERATOR(':');
5003     case '(':
5004         s++;
5005         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5006             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5007         else
5008             PL_expect = XTERM;
5009         s = SKIPSPACE1(s);
5010         TOKEN('(');
5011     case ';':
5012         CLINE;
5013         {
5014             const char tmp = *s++;
5015             OPERATOR(tmp);
5016         }
5017     case ')':
5018         {
5019             const char tmp = *s++;
5020             s = SKIPSPACE1(s);
5021             if (*s == '{')
5022                 PREBLOCK(tmp);
5023             TERM(tmp);
5024         }
5025     case ']':
5026         s++;
5027         if (PL_lex_brackets <= 0)
5028             yyerror("Unmatched right square bracket");
5029         else
5030             --PL_lex_brackets;
5031         if (PL_lex_state == LEX_INTERPNORMAL) {
5032             if (PL_lex_brackets == 0) {
5033                 if (*s == '-' && s[1] == '>')
5034                     PL_lex_state = LEX_INTERPENDMAYBE;
5035                 else if (*s != '[' && *s != '{')
5036                     PL_lex_state = LEX_INTERPEND;
5037             }
5038         }
5039         TERM(']');
5040     case '{':
5041       leftbracket:
5042         s++;
5043         if (PL_lex_brackets > 100) {
5044             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5045         }
5046         switch (PL_expect) {
5047         case XTERM:
5048             if (PL_lex_formbrack) {
5049                 s--;
5050                 PRETERMBLOCK(DO);
5051             }
5052             if (PL_oldoldbufptr == PL_last_lop)
5053                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5054             else
5055                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5056             OPERATOR(HASHBRACK);
5057         case XOPERATOR:
5058             while (s < PL_bufend && SPACE_OR_TAB(*s))
5059                 s++;
5060             d = s;
5061             PL_tokenbuf[0] = '\0';
5062             if (d < PL_bufend && *d == '-') {
5063                 PL_tokenbuf[0] = '-';
5064                 d++;
5065                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5066                     d++;
5067             }
5068             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5069                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5070                               FALSE, &len);
5071                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5072                     d++;
5073                 if (*d == '}') {
5074                     const char minus = (PL_tokenbuf[0] == '-');
5075                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5076                     if (minus)
5077                         force_next('-');
5078                 }
5079             }
5080             /* FALL THROUGH */
5081         case XATTRBLOCK:
5082         case XBLOCK:
5083             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5084             PL_expect = XSTATE;
5085             break;
5086         case XATTRTERM:
5087         case XTERMBLOCK:
5088             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5089             PL_expect = XSTATE;
5090             break;
5091         default: {
5092                 const char *t;
5093                 if (PL_oldoldbufptr == PL_last_lop)
5094                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5095                 else
5096                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5097                 s = SKIPSPACE1(s);
5098                 if (*s == '}') {
5099                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5100                         PL_expect = XTERM;
5101                         /* This hack is to get the ${} in the message. */
5102                         PL_bufptr = s+1;
5103                         yyerror("syntax error");
5104                         break;
5105                     }
5106                     OPERATOR(HASHBRACK);
5107                 }
5108                 /* This hack serves to disambiguate a pair of curlies
5109                  * as being a block or an anon hash.  Normally, expectation
5110                  * determines that, but in cases where we're not in a
5111                  * position to expect anything in particular (like inside
5112                  * eval"") we have to resolve the ambiguity.  This code
5113                  * covers the case where the first term in the curlies is a
5114                  * quoted string.  Most other cases need to be explicitly
5115                  * disambiguated by prepending a "+" before the opening
5116                  * curly in order to force resolution as an anon hash.
5117                  *
5118                  * XXX should probably propagate the outer expectation
5119                  * into eval"" to rely less on this hack, but that could
5120                  * potentially break current behavior of eval"".
5121                  * GSAR 97-07-21
5122                  */
5123                 t = s;
5124                 if (*s == '\'' || *s == '"' || *s == '`') {
5125                     /* common case: get past first string, handling escapes */
5126                     for (t++; t < PL_bufend && *t != *s;)
5127                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5128                             t++;
5129                     t++;
5130                 }
5131                 else if (*s == 'q') {
5132                     if (++t < PL_bufend
5133                         && (!isALNUM(*t)
5134                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5135                                 && !isALNUM(*t))))
5136                     {
5137                         /* skip q//-like construct */
5138                         const char *tmps;
5139                         char open, close, term;
5140                         I32 brackets = 1;
5141
5142                         while (t < PL_bufend && isSPACE(*t))
5143                             t++;
5144                         /* check for q => */
5145                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5146                             OPERATOR(HASHBRACK);
5147                         }
5148                         term = *t;
5149                         open = term;
5150                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5151                             term = tmps[5];
5152                         close = term;
5153                         if (open == close)
5154                             for (t++; t < PL_bufend; t++) {
5155                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5156                                     t++;
5157                                 else if (*t == open)
5158                                     break;
5159                             }
5160                         else {
5161                             for (t++; t < PL_bufend; t++) {
5162                                 if (*t == '\\' && t+1 < PL_bufend)
5163                                     t++;
5164                                 else if (*t == close && --brackets <= 0)
5165                                     break;
5166                                 else if (*t == open)
5167                                     brackets++;
5168                             }
5169                         }
5170                         t++;
5171                     }
5172                     else
5173                         /* skip plain q word */
5174                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5175                              t += UTF8SKIP(t);
5176                 }
5177                 else if (isALNUM_lazy_if(t,UTF)) {
5178                     t += UTF8SKIP(t);
5179                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5180                          t += UTF8SKIP(t);
5181                 }
5182                 while (t < PL_bufend && isSPACE(*t))
5183                     t++;
5184                 /* if comma follows first term, call it an anon hash */
5185                 /* XXX it could be a comma expression with loop modifiers */
5186                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5187                                    || (*t == '=' && t[1] == '>')))
5188                     OPERATOR(HASHBRACK);
5189                 if (PL_expect == XREF)
5190                     PL_expect = XTERM;
5191                 else {
5192                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5193                     PL_expect = XSTATE;
5194                 }
5195             }
5196             break;
5197         }
5198         pl_yylval.ival = CopLINE(PL_curcop);
5199         if (isSPACE(*s) || *s == '#')
5200             PL_copline = NOLINE;   /* invalidate current command line number */
5201         TOKEN('{');
5202     case '}':
5203       rightbracket:
5204         s++;
5205         if (PL_lex_brackets <= 0)
5206             yyerror("Unmatched right curly bracket");
5207         else
5208             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5209         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5210             PL_lex_formbrack = 0;
5211         if (PL_lex_state == LEX_INTERPNORMAL) {
5212             if (PL_lex_brackets == 0) {
5213                 if (PL_expect & XFAKEBRACK) {
5214                     PL_expect &= XENUMMASK;
5215                     PL_lex_state = LEX_INTERPEND;
5216                     PL_bufptr = s;
5217 #if 0
5218                     if (PL_madskills) {
5219                         if (!PL_thiswhite)
5220                             PL_thiswhite = newSVpvs("");
5221                         sv_catpvs(PL_thiswhite,"}");
5222                     }
5223 #endif
5224                     return yylex();     /* ignore fake brackets */
5225                 }
5226                 if (*s == '-' && s[1] == '>')
5227                     PL_lex_state = LEX_INTERPENDMAYBE;
5228                 else if (*s != '[' && *s != '{')
5229                     PL_lex_state = LEX_INTERPEND;
5230             }
5231         }
5232         if (PL_expect & XFAKEBRACK) {
5233             PL_expect &= XENUMMASK;
5234             PL_bufptr = s;
5235             return yylex();             /* ignore fake brackets */
5236         }
5237         start_force(PL_curforce);
5238         if (PL_madskills) {
5239             curmad('X', newSVpvn(s-1,1));
5240             CURMAD('_', PL_thiswhite);
5241         }
5242         force_next('}');
5243 #ifdef PERL_MAD
5244         if (!PL_thistoken)
5245             PL_thistoken = newSVpvs("");
5246 #endif
5247         TOKEN(';');
5248     case '&':
5249         s++;
5250         if (*s++ == '&')
5251             AOPERATOR(ANDAND);
5252         s--;
5253         if (PL_expect == XOPERATOR) {
5254             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5255                 && isIDFIRST_lazy_if(s,UTF))
5256             {
5257                 CopLINE_dec(PL_curcop);
5258                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5259                 CopLINE_inc(PL_curcop);
5260             }
5261             BAop(OP_BIT_AND);
5262         }
5263
5264         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5265         if (*PL_tokenbuf) {
5266             PL_expect = XOPERATOR;
5267             force_ident(PL_tokenbuf, '&');
5268         }
5269         else
5270             PREREF('&');
5271         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5272         TERM('&');
5273
5274     case '|':
5275         s++;
5276         if (*s++ == '|')
5277             AOPERATOR(OROR);
5278         s--;
5279         BOop(OP_BIT_OR);
5280     case '=':
5281         s++;
5282         {
5283             const char tmp = *s++;
5284             if (tmp == '=')
5285                 Eop(OP_EQ);
5286             if (tmp == '>')
5287                 OPERATOR(',');
5288             if (tmp == '~')
5289                 PMop(OP_MATCH);
5290             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5291                 && strchr("+-*/%.^&|<",tmp))
5292                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5293                             "Reversed %c= operator",(int)tmp);
5294             s--;
5295             if (PL_expect == XSTATE && isALPHA(tmp) &&
5296                 (s == PL_linestart+1 || s[-2] == '\n') )
5297                 {
5298                     if (PL_in_eval && !PL_rsfp) {
5299                         d = PL_bufend;
5300                         while (s < d) {
5301                             if (*s++ == '\n') {
5302                                 incline(s);
5303                                 if (strnEQ(s,"=cut",4)) {
5304                                     s = strchr(s,'\n');
5305                                     if (s)
5306                                         s++;
5307                                     else
5308                                         s = d;
5309                                     incline(s);
5310                                     goto retry;
5311                                 }
5312                             }
5313                         }
5314                         goto retry;
5315                     }
5316 #ifdef PERL_MAD
5317                     if (PL_madskills) {
5318                         if (!PL_thiswhite)
5319                             PL_thiswhite = newSVpvs("");
5320                         sv_catpvn(PL_thiswhite, PL_linestart,
5321                                   PL_bufend - PL_linestart);
5322                     }
5323 #endif
5324                     s = PL_bufend;
5325                     PL_doextract = TRUE;
5326                     goto retry;
5327                 }
5328         }
5329         if (PL_lex_brackets < PL_lex_formbrack) {
5330             const char *t = s;
5331 #ifdef PERL_STRICT_CR
5332             while (SPACE_OR_TAB(*t))
5333 #else
5334             while (SPACE_OR_TAB(*t) || *t == '\r')
5335 #endif
5336                 t++;
5337             if (*t == '\n' || *t == '#') {
5338                 s--;
5339                 PL_expect = XBLOCK;
5340                 goto leftbracket;
5341             }
5342         }
5343         pl_yylval.ival = 0;
5344         OPERATOR(ASSIGNOP);
5345     case '!':
5346         s++;
5347         {
5348             const char tmp = *s++;
5349             if (tmp == '=') {
5350                 /* was this !=~ where !~ was meant?
5351                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5352
5353                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5354                     const char *t = s+1;
5355
5356                     while (t < PL_bufend && isSPACE(*t))
5357                         ++t;
5358
5359                     if (*t == '/' || *t == '?' ||
5360                         ((*t == 'm' || *t == 's' || *t == 'y')
5361                          && !isALNUM(t[1])) ||
5362                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5363                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5364                                     "!=~ should be !~");
5365                 }
5366                 Eop(OP_NE);
5367             }
5368             if (tmp == '~')
5369                 PMop(OP_NOT);
5370         }
5371         s--;
5372         OPERATOR('!');
5373     case '<':
5374         if (PL_expect != XOPERATOR) {
5375             if (s[1] != '<' && !strchr(s,'>'))
5376                 check_uni();
5377             if (s[1] == '<')
5378                 s = scan_heredoc(s);
5379             else
5380                 s = scan_inputsymbol(s);
5381             TERM(sublex_start());
5382         }
5383         s++;
5384         {
5385             char tmp = *s++;
5386             if (tmp == '<')
5387                 SHop(OP_LEFT_SHIFT);
5388             if (tmp == '=') {
5389                 tmp = *s++;
5390                 if (tmp == '>')
5391                     Eop(OP_NCMP);
5392                 s--;
5393                 Rop(OP_LE);
5394             }
5395         }
5396         s--;
5397         Rop(OP_LT);
5398     case '>':
5399         s++;
5400         {
5401             const char tmp = *s++;
5402             if (tmp == '>')
5403                 SHop(OP_RIGHT_SHIFT);
5404             else if (tmp == '=')
5405                 Rop(OP_GE);
5406         }
5407         s--;
5408         Rop(OP_GT);
5409
5410     case '$':
5411         CLINE;
5412
5413         if (PL_expect == XOPERATOR) {
5414             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5415                 return deprecate_commaless_var_list();
5416             }
5417         }
5418
5419         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5420             PL_tokenbuf[0] = '@';
5421             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5422                            sizeof PL_tokenbuf - 1, FALSE);
5423             if (PL_expect == XOPERATOR)
5424                 no_op("Array length", s);
5425             if (!PL_tokenbuf[1])
5426                 PREREF(DOLSHARP);
5427             PL_expect = XOPERATOR;
5428             PL_pending_ident = '#';
5429             TOKEN(DOLSHARP);
5430         }
5431
5432         PL_tokenbuf[0] = '$';
5433         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5434                        sizeof PL_tokenbuf - 1, FALSE);
5435         if (PL_expect == XOPERATOR)
5436             no_op("Scalar", s);
5437         if (!PL_tokenbuf[1]) {
5438             if (s == PL_bufend)
5439                 yyerror("Final $ should be \\$ or $name");
5440             PREREF('$');
5441         }
5442
5443         /* This kludge not intended to be bulletproof. */
5444         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5445             pl_yylval.opval = newSVOP(OP_CONST, 0,
5446                                    newSViv(CopARYBASE_get(&PL_compiling)));
5447             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5448             TERM(THING);
5449         }
5450
5451         d = s;
5452         {
5453             const char tmp = *s;
5454             if (PL_lex_state == LEX_NORMAL)
5455                 s = SKIPSPACE1(s);
5456
5457             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5458                 && intuit_more(s)) {
5459                 if (*s == '[') {
5460                     PL_tokenbuf[0] = '@';
5461                     if (ckWARN(WARN_SYNTAX)) {
5462                         char *t = s+1;
5463
5464                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5465                             t++;
5466                         if (*t++ == ',') {
5467                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5468                             while (t < PL_bufend && *t != ']')
5469                                 t++;
5470                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5471                                         "Multidimensional syntax %.*s not supported",
5472                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5473                         }
5474                     }
5475                 }
5476                 else if (*s == '{') {
5477                     char *t;
5478                     PL_tokenbuf[0] = '%';
5479                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5480                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5481                         {
5482                             char tmpbuf[sizeof PL_tokenbuf];
5483                             do {
5484                                 t++;
5485                             } while (isSPACE(*t));
5486                             if (isIDFIRST_lazy_if(t,UTF)) {
5487                                 STRLEN len;
5488                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5489                                               &len);
5490                                 while (isSPACE(*t))
5491                                     t++;
5492                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5493                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5494                                                 "You need to quote \"%s\"",
5495                                                 tmpbuf);
5496                             }
5497                         }
5498                 }
5499             }
5500
5501             PL_expect = XOPERATOR;
5502             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5503                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5504                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5505                     PL_expect = XOPERATOR;
5506                 else if (strchr("$@\"'`q", *s))
5507                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5508                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5509                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5510                 else if (isIDFIRST_lazy_if(s,UTF)) {
5511                     char tmpbuf[sizeof PL_tokenbuf];
5512                     int t2;
5513                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5514                     if ((t2 = keyword(tmpbuf, len, 0))) {
5515                         /* binary operators exclude handle interpretations */
5516                         switch (t2) {
5517                         case -KEY_x:
5518                         case -KEY_eq:
5519                         case -KEY_ne:
5520                         case -KEY_gt:
5521                         case -KEY_lt:
5522                         case -KEY_ge:
5523                         case -KEY_le:
5524                         case -KEY_cmp:
5525                             break;
5526                         default:
5527                             PL_expect = XTERM;  /* e.g. print $fh length() */
5528                             break;
5529                         }
5530                     }
5531                     else {
5532                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5533                     }
5534                 }
5535                 else if (isDIGIT(*s))
5536                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5537                 else if (*s == '.' && isDIGIT(s[1]))
5538                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5539                 else if ((*s == '?' || *s == '-' || *s == '+')
5540                          && !isSPACE(s[1]) && s[1] != '=')
5541                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5542                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5543                          && s[1] != '/')
5544                     PL_expect = XTERM;          /* e.g. print $fh /.../
5545                                                    XXX except DORDOR operator
5546                                                 */
5547                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5548                          && s[2] != '=')
5549                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5550             }
5551         }
5552         PL_pending_ident = '$';
5553         TOKEN('$');
5554
5555     case '@':
5556         if (PL_expect == XOPERATOR)
5557             no_op("Array", s);
5558         PL_tokenbuf[0] = '@';
5559         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5560         if (!PL_tokenbuf[1]) {
5561             PREREF('@');
5562         }
5563         if (PL_lex_state == LEX_NORMAL)
5564             s = SKIPSPACE1(s);
5565         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5566             if (*s == '{')
5567                 PL_tokenbuf[0] = '%';
5568
5569             /* Warn about @ where they meant $. */
5570             if (*s == '[' || *s == '{') {
5571                 if (ckWARN(WARN_SYNTAX)) {
5572                     const char *t = s + 1;
5573                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5574                         t++;
5575                     if (*t == '}' || *t == ']') {
5576                         t++;
5577                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5578                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5579                             "Scalar value %.*s better written as $%.*s",
5580                             (int)(t-PL_bufptr), PL_bufptr,
5581                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5582                     }
5583                 }
5584             }
5585         }
5586         PL_pending_ident = '@';
5587         TERM('@');
5588
5589      case '/':                  /* may be division, defined-or, or pattern */
5590         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5591             s += 2;
5592             AOPERATOR(DORDOR);
5593         }
5594      case '?':                  /* may either be conditional or pattern */
5595         if (PL_expect == XOPERATOR) {
5596              char tmp = *s++;
5597              if(tmp == '?') {
5598                 OPERATOR('?');
5599              }
5600              else {
5601                  tmp = *s++;
5602                  if(tmp == '/') {
5603                      /* A // operator. */
5604                     AOPERATOR(DORDOR);
5605                  }
5606                  else {
5607                      s--;
5608                      Mop(OP_DIVIDE);
5609                  }
5610              }
5611          }
5612          else {
5613              /* Disable warning on "study /blah/" */
5614              if (PL_oldoldbufptr == PL_last_uni
5615               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5616                   || memNE(PL_last_uni, "study", 5)
5617                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5618               ))
5619                  check_uni();
5620              s = scan_pat(s,OP_MATCH);
5621              TERM(sublex_start());
5622          }
5623
5624     case '.':
5625         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5626 #ifdef PERL_STRICT_CR
5627             && s[1] == '\n'
5628 #else
5629             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5630 #endif
5631             && (s == PL_linestart || s[-1] == '\n') )
5632         {
5633             PL_lex_formbrack = 0;
5634             PL_expect = XSTATE;
5635             goto rightbracket;
5636         }
5637         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5638             s += 3;
5639             OPERATOR(YADAYADA);
5640         }
5641         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5642             char tmp = *s++;
5643             if (*s == tmp) {
5644                 s++;
5645                 if (*s == tmp) {
5646                     s++;
5647                     pl_yylval.ival = OPf_SPECIAL;
5648                 }
5649                 else
5650                     pl_yylval.ival = 0;
5651                 OPERATOR(DOTDOT);
5652             }
5653             if (PL_expect != XOPERATOR)
5654                 check_uni();
5655             Aop(OP_CONCAT);
5656         }
5657         /* FALL THROUGH */
5658     case '0': case '1': case '2': case '3': case '4':
5659     case '5': case '6': case '7': case '8': case '9':
5660         s = scan_num(s, &pl_yylval);
5661         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5662         if (PL_expect == XOPERATOR)
5663             no_op("Number",s);
5664         TERM(THING);
5665
5666     case '\'':
5667         s = scan_str(s,!!PL_madskills,FALSE);
5668         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5669         if (PL_expect == XOPERATOR) {
5670             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5671                 return deprecate_commaless_var_list();
5672             }
5673             else
5674                 no_op("String",s);
5675         }
5676         if (!s)
5677             missingterm(NULL);
5678         pl_yylval.ival = OP_CONST;
5679         TERM(sublex_start());
5680
5681     case '"':
5682         s = scan_str(s,!!PL_madskills,FALSE);
5683         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5684         if (PL_expect == XOPERATOR) {
5685             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5686                 return deprecate_commaless_var_list();
5687             }
5688             else
5689                 no_op("String",s);
5690         }
5691         if (!s)
5692             missingterm(NULL);
5693         pl_yylval.ival = OP_CONST;
5694         /* FIXME. I think that this can be const if char *d is replaced by
5695            more localised variables.  */
5696         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5697             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5698                 pl_yylval.ival = OP_STRINGIFY;
5699                 break;
5700             }
5701         }
5702         TERM(sublex_start());
5703
5704     case '`':
5705         s = scan_str(s,!!PL_madskills,FALSE);
5706         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5707         if (PL_expect == XOPERATOR)
5708             no_op("Backticks",s);
5709         if (!s)
5710             missingterm(NULL);
5711         readpipe_override();
5712         TERM(sublex_start());
5713
5714     case '\\':
5715         s++;
5716         if (PL_lex_inwhat && isDIGIT(*s))
5717             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5718                            *s, *s);
5719         if (PL_expect == XOPERATOR)
5720             no_op("Backslash",s);
5721         OPERATOR(REFGEN);
5722
5723     case 'v':
5724         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5725             char *start = s + 2;
5726             while (isDIGIT(*start) || *start == '_')
5727                 start++;
5728             if (*start == '.' && isDIGIT(start[1])) {
5729                 s = scan_num(s, &pl_yylval);
5730                 TERM(THING);
5731             }
5732             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5733             else if (!isALPHA(*start) && (PL_expect == XTERM
5734                         || PL_expect == XREF || PL_expect == XSTATE
5735                         || PL_expect == XTERMORDORDOR)) {
5736                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5737                 if (!gv) {
5738                     s = scan_num(s, &pl_yylval);
5739                     TERM(THING);
5740                 }
5741             }
5742         }
5743         goto keylookup;
5744     case 'x':
5745         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5746             s++;
5747             Mop(OP_REPEAT);
5748         }
5749         goto keylookup;
5750
5751     case '_':
5752     case 'a': case 'A':
5753     case 'b': case 'B':
5754     case 'c': case 'C':
5755     case 'd': case 'D':
5756     case 'e': case 'E':
5757     case 'f': case 'F':
5758     case 'g': case 'G':
5759     case 'h': case 'H':
5760     case 'i': case 'I':
5761     case 'j': case 'J':
5762     case 'k': case 'K':
5763     case 'l': case 'L':
5764     case 'm': case 'M':
5765     case 'n': case 'N':
5766     case 'o': case 'O':
5767     case 'p': case 'P':
5768     case 'q': case 'Q':
5769     case 'r': case 'R':
5770     case 's': case 'S':
5771     case 't': case 'T':
5772     case 'u': case 'U':
5773               case 'V':
5774     case 'w': case 'W':
5775               case 'X':
5776     case 'y': case 'Y':
5777     case 'z': case 'Z':
5778
5779       keylookup: {
5780         bool anydelim;
5781         I32 tmp;
5782
5783         orig_keyword = 0;
5784         gv = NULL;
5785         gvp = NULL;
5786
5787         PL_bufptr = s;
5788         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5789
5790         /* Some keywords can be followed by any delimiter, including ':' */
5791         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5792                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5793                              (PL_tokenbuf[0] == 'q' &&
5794                               strchr("qwxr", PL_tokenbuf[1])))));
5795
5796         /* x::* is just a word, unless x is "CORE" */
5797         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5798             goto just_a_word;
5799
5800         d = s;
5801         while (d < PL_bufend && isSPACE(*d))
5802                 d++;    /* no comments skipped here, or s### is misparsed */
5803
5804         /* Is this a word before a => operator? */
5805         if (*d == '=' && d[1] == '>') {
5806             CLINE;
5807             pl_yylval.opval
5808                 = (OP*)newSVOP(OP_CONST, 0,
5809                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5810             pl_yylval.opval->op_private = OPpCONST_BARE;
5811             TERM(WORD);
5812         }
5813
5814         /* Check for plugged-in keyword */
5815         {
5816             OP *o;
5817             int result;
5818             char *saved_bufptr = PL_bufptr;
5819             PL_bufptr = s;
5820             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5821             s = PL_bufptr;
5822             if (result == KEYWORD_PLUGIN_DECLINE) {
5823                 /* not a plugged-in keyword */
5824                 PL_bufptr = saved_bufptr;
5825             } else if (result == KEYWORD_PLUGIN_STMT) {
5826                 pl_yylval.opval = o;
5827                 CLINE;
5828                 PL_expect = XSTATE;
5829                 return REPORT(PLUGSTMT);
5830             } else if (result == KEYWORD_PLUGIN_EXPR) {
5831                 pl_yylval.opval = o;
5832                 CLINE;
5833                 PL_expect = XOPERATOR;
5834                 return REPORT(PLUGEXPR);
5835             } else {
5836                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5837                                         PL_tokenbuf);
5838             }
5839         }
5840
5841         /* Check for built-in keyword */
5842         tmp = keyword(PL_tokenbuf, len, 0);
5843
5844         /* Is this a label? */
5845         if (!anydelim && PL_expect == XSTATE
5846               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5847             if (tmp)
5848                 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5849             s = d + 1;
5850             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5851             CLINE;
5852             TOKEN(LABEL);
5853         }
5854
5855         if (tmp < 0) {                  /* second-class keyword? */
5856             GV *ogv = NULL;     /* override (winner) */
5857             GV *hgv = NULL;     /* hidden (loser) */
5858             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5859                 CV *cv;
5860                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5861                     (cv = GvCVu(gv)))
5862                 {
5863                     if (GvIMPORTED_CV(gv))
5864                         ogv = gv;
5865                     else if (! CvMETHOD(cv))
5866                         hgv = gv;
5867                 }
5868                 if (!ogv &&
5869                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5870                     (gv = *gvp) && isGV_with_GP(gv) &&
5871                     GvCVu(gv) && GvIMPORTED_CV(gv))
5872                 {
5873                     ogv = gv;
5874                 }
5875             }
5876             if (ogv) {
5877                 orig_keyword = tmp;
5878                 tmp = 0;                /* overridden by import or by GLOBAL */
5879             }
5880             else if (gv && !gvp
5881                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5882                      && GvCVu(gv))
5883             {
5884                 tmp = 0;                /* any sub overrides "weak" keyword */
5885             }
5886             else {                      /* no override */
5887                 tmp = -tmp;
5888                 if (tmp == KEY_dump) {
5889                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5890                                    "dump() better written as CORE::dump()");
5891                 }
5892                 gv = NULL;
5893                 gvp = 0;
5894                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
5895                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5896                                    "Ambiguous call resolved as CORE::%s(), %s",
5897                                    GvENAME(hgv), "qualify as such or use &");
5898             }
5899         }
5900
5901       reserved_word:
5902         switch (tmp) {
5903
5904         default:                        /* not a keyword */
5905             /* Trade off - by using this evil construction we can pull the
5906                variable gv into the block labelled keylookup. If not, then
5907                we have to give it function scope so that the goto from the
5908                earlier ':' case doesn't bypass the initialisation.  */
5909             if (0) {
5910             just_a_word_zero_gv:
5911                 gv = NULL;
5912                 gvp = NULL;
5913                 orig_keyword = 0;
5914             }
5915           just_a_word: {
5916                 SV *sv;
5917                 int pkgname = 0;
5918                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5919                 OP *rv2cv_op;
5920                 CV *cv;
5921 #ifdef PERL_MAD
5922                 SV *nextPL_nextwhite = 0;
5923 #endif
5924
5925
5926                 /* Get the rest if it looks like a package qualifier */
5927
5928                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5929                     STRLEN morelen;
5930                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5931                                   TRUE, &morelen);
5932                     if (!morelen)
5933                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5934                                 *s == '\'' ? "'" : "::");
5935                     len += morelen;
5936                     pkgname = 1;
5937                 }
5938
5939                 if (PL_expect == XOPERATOR) {
5940                     if (PL_bufptr == PL_linestart) {
5941                         CopLINE_dec(PL_curcop);
5942                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5943                         CopLINE_inc(PL_curcop);
5944                     }
5945                     else
5946                         no_op("Bareword",s);
5947                 }
5948
5949                 /* Look for a subroutine with this name in current package,
5950                    unless name is "Foo::", in which case Foo is a bearword
5951                    (and a package name). */
5952
5953                 if (len > 2 && !PL_madskills &&
5954                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5955                 {
5956                     if (ckWARN(WARN_BAREWORD)
5957                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5958                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5959                             "Bareword \"%s\" refers to nonexistent package",
5960                              PL_tokenbuf);
5961                     len -= 2;
5962                     PL_tokenbuf[len] = '\0';
5963                     gv = NULL;
5964                     gvp = 0;
5965                 }
5966                 else {
5967                     if (!gv) {
5968                         /* Mustn't actually add anything to a symbol table.
5969                            But also don't want to "initialise" any placeholder
5970                            constants that might already be there into full
5971                            blown PVGVs with attached PVCV.  */
5972                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5973                                                GV_NOADD_NOINIT, SVt_PVCV);
5974                     }
5975                     len = 0;
5976                 }
5977
5978                 /* if we saw a global override before, get the right name */
5979
5980                 if (gvp) {
5981                     sv = newSVpvs("CORE::GLOBAL::");
5982                     sv_catpv(sv,PL_tokenbuf);
5983                 }
5984                 else {
5985                     /* If len is 0, newSVpv does strlen(), which is correct.
5986                        If len is non-zero, then it will be the true length,
5987                        and so the scalar will be created correctly.  */
5988                     sv = newSVpv(PL_tokenbuf,len);
5989                 }
5990 #ifdef PERL_MAD
5991                 if (PL_madskills && !PL_thistoken) {
5992                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5993                     PL_thistoken = newSVpvn(start,s - start);
5994                     PL_realtokenstart = s - SvPVX(PL_linestr);
5995                 }
5996 #endif
5997
5998                 /* Presume this is going to be a bareword of some sort. */
5999
6000                 CLINE;
6001                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6002                 pl_yylval.opval->op_private = OPpCONST_BARE;
6003                 /* UTF-8 package name? */
6004                 if (UTF && !IN_BYTES &&
6005                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
6006                     SvUTF8_on(sv);
6007
6008                 /* And if "Foo::", then that's what it certainly is. */
6009
6010                 if (len)
6011                     goto safe_bareword;
6012
6013                 cv = NULL;
6014                 {
6015                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6016                     const_op->op_private = OPpCONST_BARE;
6017                     rv2cv_op = newCVREF(0, const_op);
6018                 }
6019                 if (rv2cv_op->op_type == OP_RV2CV &&
6020                         (rv2cv_op->op_flags & OPf_KIDS)) {
6021                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6022                     switch (rv_op->op_type) {
6023                         case OP_CONST: {
6024                             SV *sv = cSVOPx_sv(rv_op);
6025                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6026                                 cv = (CV*)SvRV(sv);
6027                         } break;
6028                         case OP_GV: {
6029                             GV *gv = cGVOPx_gv(rv_op);
6030                             CV *maybe_cv = GvCVu(gv);
6031                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6032                                 cv = maybe_cv;
6033                         } break;
6034                     }
6035                 }
6036
6037                 /* See if it's the indirect object for a list operator. */
6038
6039                 if (PL_oldoldbufptr &&
6040                     PL_oldoldbufptr < PL_bufptr &&
6041                     (PL_oldoldbufptr == PL_last_lop
6042                      || PL_oldoldbufptr == PL_last_uni) &&
6043                     /* NO SKIPSPACE BEFORE HERE! */
6044                     (PL_expect == XREF ||
6045                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6046                 {
6047                     bool immediate_paren = *s == '(';
6048
6049                     /* (Now we can afford to cross potential line boundary.) */
6050                     s = SKIPSPACE2(s,nextPL_nextwhite);
6051 #ifdef PERL_MAD
6052                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6053 #endif
6054
6055                     /* Two barewords in a row may indicate method call. */
6056
6057                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6058                         (tmp = intuit_method(s, gv, cv))) {
6059                         op_free(rv2cv_op);
6060                         return REPORT(tmp);
6061                     }
6062
6063                     /* If not a declared subroutine, it's an indirect object. */
6064                     /* (But it's an indir obj regardless for sort.) */
6065                     /* Also, if "_" follows a filetest operator, it's a bareword */
6066
6067                     if (
6068                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6069                          (!cv &&
6070                         (PL_last_lop_op != OP_MAPSTART &&
6071                          PL_last_lop_op != OP_GREPSTART))))
6072                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6073                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6074                        )
6075                     {
6076                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6077                         goto bareword;
6078                     }
6079                 }
6080
6081                 PL_expect = XOPERATOR;
6082 #ifdef PERL_MAD
6083                 if (isSPACE(*s))
6084                     s = SKIPSPACE2(s,nextPL_nextwhite);
6085                 PL_nextwhite = nextPL_nextwhite;
6086 #else
6087                 s = skipspace(s);
6088 #endif
6089
6090                 /* Is this a word before a => operator? */
6091                 if (*s == '=' && s[1] == '>' && !pkgname) {
6092                     op_free(rv2cv_op);
6093                     CLINE;
6094                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6095                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6096                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6097                     TERM(WORD);
6098                 }
6099
6100                 /* If followed by a paren, it's certainly a subroutine. */
6101                 if (*s == '(') {
6102                     CLINE;
6103                     if (cv) {
6104                         d = s + 1;
6105                         while (SPACE_OR_TAB(*d))
6106                             d++;
6107                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6108                             s = d + 1;
6109                             goto its_constant;
6110                         }
6111                     }
6112 #ifdef PERL_MAD
6113                     if (PL_madskills) {
6114                         PL_nextwhite = PL_thiswhite;
6115                         PL_thiswhite = 0;
6116                     }
6117                     start_force(PL_curforce);
6118 #endif
6119                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6120                     PL_expect = XOPERATOR;
6121 #ifdef PERL_MAD
6122                     if (PL_madskills) {
6123                         PL_nextwhite = nextPL_nextwhite;
6124                         curmad('X', PL_thistoken);
6125                         PL_thistoken = newSVpvs("");
6126                     }
6127 #endif
6128                     op_free(rv2cv_op);
6129                     force_next(WORD);
6130                     pl_yylval.ival = 0;
6131                     TOKEN('&');
6132                 }
6133
6134                 /* If followed by var or block, call it a method (unless sub) */
6135
6136                 if ((*s == '$' || *s == '{') && !cv) {
6137                     op_free(rv2cv_op);
6138                     PL_last_lop = PL_oldbufptr;
6139                     PL_last_lop_op = OP_METHOD;
6140                     PREBLOCK(METHOD);
6141                 }
6142
6143                 /* If followed by a bareword, see if it looks like indir obj. */
6144
6145                 if (!orig_keyword
6146                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6147                         && (tmp = intuit_method(s, gv, cv))) {
6148                     op_free(rv2cv_op);
6149                     return REPORT(tmp);
6150                 }
6151
6152                 /* Not a method, so call it a subroutine (if defined) */
6153
6154                 if (cv) {
6155                     if (lastchar == '-')
6156                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6157                                          "Ambiguous use of -%s resolved as -&%s()",
6158                                          PL_tokenbuf, PL_tokenbuf);
6159                     /* Check for a constant sub */
6160                     if ((sv = cv_const_sv(cv))) {
6161                   its_constant:
6162                         op_free(rv2cv_op);
6163                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6164                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6165                         pl_yylval.opval->op_private = 0;
6166                         TOKEN(WORD);
6167                     }
6168
6169                     op_free(pl_yylval.opval);
6170                     pl_yylval.opval = rv2cv_op;
6171                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6172                     PL_last_lop = PL_oldbufptr;
6173                     PL_last_lop_op = OP_ENTERSUB;
6174                     /* Is there a prototype? */
6175                     if (
6176 #ifdef PERL_MAD
6177                         cv &&
6178 #endif
6179                         SvPOK(cv))
6180                     {
6181                         STRLEN protolen;
6182                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6183                         if (!protolen)
6184                             TERM(FUNC0SUB);
6185                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6186                             OPERATOR(UNIOPSUB);
6187                         while (*proto == ';')
6188                             proto++;
6189                         if (*proto == '&' && *s == '{') {
6190                             if (PL_curstash)
6191                                 sv_setpvs(PL_subname, "__ANON__");
6192                             else
6193                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6194                             PREBLOCK(LSTOPSUB);
6195                         }
6196                     }
6197 #ifdef PERL_MAD
6198                     {
6199                         if (PL_madskills) {
6200                             PL_nextwhite = PL_thiswhite;
6201                             PL_thiswhite = 0;
6202                         }
6203                         start_force(PL_curforce);
6204                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6205                         PL_expect = XTERM;
6206                         if (PL_madskills) {
6207                             PL_nextwhite = nextPL_nextwhite;
6208                             curmad('X', PL_thistoken);
6209                             PL_thistoken = newSVpvs("");
6210                         }
6211                         force_next(WORD);
6212                         TOKEN(NOAMP);
6213                     }
6214                 }
6215
6216                 /* Guess harder when madskills require "best effort". */
6217                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6218                     int probable_sub = 0;
6219                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6220                         probable_sub = 1;
6221                     else if (isALPHA(*s)) {
6222                         char tmpbuf[1024];
6223                         STRLEN tmplen;
6224                         d = s;
6225                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6226                         if (!keyword(tmpbuf, tmplen, 0))
6227                             probable_sub = 1;
6228                         else {
6229                             while (d < PL_bufend && isSPACE(*d))
6230                                 d++;
6231                             if (*d == '=' && d[1] == '>')
6232                                 probable_sub = 1;
6233                         }
6234                     }
6235                     if (probable_sub) {
6236                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6237                         op_free(pl_yylval.opval);
6238                         pl_yylval.opval = rv2cv_op;
6239                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6240                         PL_last_lop = PL_oldbufptr;
6241                         PL_last_lop_op = OP_ENTERSUB;
6242                         PL_nextwhite = PL_thiswhite;
6243                         PL_thiswhite = 0;
6244                         start_force(PL_curforce);
6245                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6246                         PL_expect = XTERM;
6247                         PL_nextwhite = nextPL_nextwhite;
6248                         curmad('X', PL_thistoken);
6249                         PL_thistoken = newSVpvs("");
6250                         force_next(WORD);
6251                         TOKEN(NOAMP);
6252                     }
6253 #else
6254                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6255                     PL_expect = XTERM;
6256                     force_next(WORD);
6257                     TOKEN(NOAMP);
6258 #endif
6259                 }
6260
6261                 /* Call it a bare word */
6262
6263                 if (PL_hints & HINT_STRICT_SUBS)
6264                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6265                 else {
6266                 bareword:
6267                     /* after "print" and similar functions (corresponding to
6268                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6269                      * a filehandle should be subject to "strict subs".
6270                      * Likewise for the optional indirect-object argument to system
6271                      * or exec, which can't be a bareword */
6272                     if ((PL_last_lop_op == OP_PRINT
6273                             || PL_last_lop_op == OP_PRTF
6274                             || PL_last_lop_op == OP_SAY
6275                             || PL_last_lop_op == OP_SYSTEM
6276                             || PL_last_lop_op == OP_EXEC)
6277                             && (PL_hints & HINT_STRICT_SUBS))
6278                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6279                     if (lastchar != '-') {
6280                         if (ckWARN(WARN_RESERVED)) {
6281                             d = PL_tokenbuf;
6282                             while (isLOWER(*d))
6283                                 d++;
6284                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6285                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6286                                        PL_tokenbuf);
6287                         }
6288                     }
6289                 }
6290                 op_free(rv2cv_op);
6291
6292             safe_bareword:
6293                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6294                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6295                                      "Operator or semicolon missing before %c%s",
6296                                      lastchar, PL_tokenbuf);
6297                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6298                                      "Ambiguous use of %c resolved as operator %c",
6299                                      lastchar, lastchar);
6300                 }
6301                 TOKEN(WORD);
6302             }
6303
6304         case KEY___FILE__:
6305             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6306                                         newSVpv(CopFILE(PL_curcop),0));
6307             TERM(THING);
6308
6309         case KEY___LINE__:
6310             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6311                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6312             TERM(THING);
6313
6314         case KEY___PACKAGE__:
6315             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6316                                         (PL_curstash
6317                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6318                                          : &PL_sv_undef));
6319             TERM(THING);
6320
6321         case KEY___DATA__:
6322         case KEY___END__: {
6323             GV *gv;
6324             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6325                 const char *pname = "main";
6326                 if (PL_tokenbuf[2] == 'D')
6327                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6328                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6329                                 SVt_PVIO);
6330                 GvMULTI_on(gv);
6331                 if (!GvIO(gv))
6332                     GvIOp(gv) = newIO();
6333                 IoIFP(GvIOp(gv)) = PL_rsfp;
6334 #if defined(HAS_FCNTL) && defined(F_SETFD)
6335                 {
6336                     const int fd = PerlIO_fileno(PL_rsfp);
6337                     fcntl(fd,F_SETFD,fd >= 3);
6338                 }
6339 #endif
6340                 /* Mark this internal pseudo-handle as clean */
6341                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6342                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6343                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6344                 else
6345                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6346 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6347                 /* if the script was opened in binmode, we need to revert
6348                  * it to text mode for compatibility; but only iff it has CRs
6349                  * XXX this is a questionable hack at best. */
6350                 if (PL_bufend-PL_bufptr > 2
6351                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6352                 {
6353                     Off_t loc = 0;
6354                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6355                         loc = PerlIO_tell(PL_rsfp);
6356                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6357                     }
6358 #ifdef NETWARE
6359                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6360 #else
6361                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6362 #endif  /* NETWARE */
6363 #ifdef PERLIO_IS_STDIO /* really? */
6364 #  if defined(__BORLANDC__)
6365                         /* XXX see note in do_binmode() */
6366                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6367 #  endif
6368 #endif
6369                         if (loc > 0)
6370                             PerlIO_seek(PL_rsfp, loc, 0);
6371                     }
6372                 }
6373 #endif
6374 #ifdef PERLIO_LAYERS
6375                 if (!IN_BYTES) {
6376                     if (UTF)
6377                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6378                     else if (PL_encoding) {
6379                         SV *name;
6380                         dSP;
6381                         ENTER;
6382                         SAVETMPS;
6383                         PUSHMARK(sp);
6384                         EXTEND(SP, 1);
6385                         XPUSHs(PL_encoding);
6386                         PUTBACK;
6387                         call_method("name", G_SCALAR);
6388                         SPAGAIN;
6389                         name = POPs;
6390                         PUTBACK;
6391                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6392                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6393                                                       SVfARG(name)));
6394                         FREETMPS;
6395                         LEAVE;
6396                     }
6397                 }
6398 #endif
6399 #ifdef PERL_MAD
6400                 if (PL_madskills) {
6401                     if (PL_realtokenstart >= 0) {
6402                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6403                         if (!PL_endwhite)
6404                             PL_endwhite = newSVpvs("");
6405                         sv_catsv(PL_endwhite, PL_thiswhite);
6406                         PL_thiswhite = 0;
6407                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6408                         PL_realtokenstart = -1;
6409                     }
6410                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6411                            != NULL) ;
6412                 }
6413 #endif
6414                 PL_rsfp = NULL;
6415             }
6416             goto fake_eof;
6417         }
6418
6419         case KEY_AUTOLOAD:
6420         case KEY_DESTROY:
6421         case KEY_BEGIN:
6422         case KEY_UNITCHECK:
6423         case KEY_CHECK:
6424         case KEY_INIT:
6425         case KEY_END:
6426             if (PL_expect == XSTATE) {
6427                 s = PL_bufptr;
6428                 goto really_sub;
6429             }
6430             goto just_a_word;
6431
6432         case KEY_CORE:
6433             if (*s == ':' && s[1] == ':') {
6434                 s += 2;
6435                 d = s;
6436                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6437                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6438                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6439                 if (tmp < 0)
6440                     tmp = -tmp;
6441                 else if (tmp == KEY_require || tmp == KEY_do)
6442                     /* that's a way to remember we saw "CORE::" */
6443                     orig_keyword = tmp;
6444                 goto reserved_word;
6445             }
6446             goto just_a_word;
6447
6448         case KEY_abs:
6449             UNI(OP_ABS);
6450
6451         case KEY_alarm:
6452             UNI(OP_ALARM);
6453
6454         case KEY_accept:
6455             LOP(OP_ACCEPT,XTERM);
6456
6457         case KEY_and:
6458             OPERATOR(ANDOP);
6459
6460         case KEY_atan2:
6461             LOP(OP_ATAN2,XTERM);
6462
6463         case KEY_bind:
6464             LOP(OP_BIND,XTERM);
6465
6466         case KEY_binmode:
6467             LOP(OP_BINMODE,XTERM);
6468
6469         case KEY_bless:
6470             LOP(OP_BLESS,XTERM);
6471
6472         case KEY_break:
6473             FUN0(OP_BREAK);
6474
6475         case KEY_chop:
6476             UNI(OP_CHOP);
6477
6478         case KEY_continue:
6479             /* When 'use switch' is in effect, continue has a dual
6480                life as a control operator. */
6481             {
6482                 if (!FEATURE_IS_ENABLED("switch"))
6483                     PREBLOCK(CONTINUE);
6484                 else {
6485                     /* We have to disambiguate the two senses of
6486                       "continue". If the next token is a '{' then
6487                       treat it as the start of a continue block;
6488                       otherwise treat it as a control operator.
6489                      */
6490                     s = skipspace(s);
6491                     if (*s == '{')
6492             PREBLOCK(CONTINUE);
6493                     else
6494                         FUN0(OP_CONTINUE);
6495                 }
6496             }
6497
6498         case KEY_chdir:
6499             /* may use HOME */
6500             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6501             UNI(OP_CHDIR);
6502
6503         case KEY_close:
6504             UNI(OP_CLOSE);
6505
6506         case KEY_closedir:
6507             UNI(OP_CLOSEDIR);
6508
6509         case KEY_cmp:
6510             Eop(OP_SCMP);
6511
6512         case KEY_caller:
6513             UNI(OP_CALLER);
6514
6515         case KEY_crypt:
6516 #ifdef FCRYPT
6517             if (!PL_cryptseen) {
6518                 PL_cryptseen = TRUE;
6519                 init_des();
6520             }
6521 #endif
6522             LOP(OP_CRYPT,XTERM);
6523
6524         case KEY_chmod:
6525             LOP(OP_CHMOD,XTERM);
6526
6527         case KEY_chown:
6528             LOP(OP_CHOWN,XTERM);
6529
6530         case KEY_connect:
6531             LOP(OP_CONNECT,XTERM);
6532
6533         case KEY_chr:
6534             UNI(OP_CHR);
6535
6536         case KEY_cos:
6537             UNI(OP_COS);
6538
6539         case KEY_chroot:
6540             UNI(OP_CHROOT);
6541
6542         case KEY_default:
6543             PREBLOCK(DEFAULT);
6544
6545         case KEY_do:
6546             s = SKIPSPACE1(s);
6547             if (*s == '{')
6548                 PRETERMBLOCK(DO);
6549             if (*s != '\'')
6550                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6551             if (orig_keyword == KEY_do) {
6552                 orig_keyword = 0;
6553                 pl_yylval.ival = 1;
6554             }
6555             else
6556                 pl_yylval.ival = 0;
6557             OPERATOR(DO);
6558
6559         case KEY_die:
6560             PL_hints |= HINT_BLOCK_SCOPE;
6561             LOP(OP_DIE,XTERM);
6562
6563         case KEY_defined:
6564             UNI(OP_DEFINED);
6565
6566         case KEY_delete:
6567             UNI(OP_DELETE);
6568
6569         case KEY_dbmopen:
6570             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6571             LOP(OP_DBMOPEN,XTERM);
6572
6573         case KEY_dbmclose:
6574             UNI(OP_DBMCLOSE);
6575
6576         case KEY_dump:
6577             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6578             LOOPX(OP_DUMP);
6579
6580         case KEY_else:
6581             PREBLOCK(ELSE);
6582
6583         case KEY_elsif:
6584             pl_yylval.ival = CopLINE(PL_curcop);
6585             OPERATOR(ELSIF);
6586
6587         case KEY_eq:
6588             Eop(OP_SEQ);
6589
6590         case KEY_exists:
6591             UNI(OP_EXISTS);
6592
6593         case KEY_exit:
6594             if (PL_madskills)
6595                 UNI(OP_INT);
6596             UNI(OP_EXIT);
6597
6598         case KEY_eval:
6599             s = SKIPSPACE1(s);
6600             PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6601             UNIBRACK(OP_ENTEREVAL);
6602
6603         case KEY_eof:
6604             UNI(OP_EOF);
6605
6606         case KEY_exp:
6607             UNI(OP_EXP);
6608
6609         case KEY_each:
6610             UNI(OP_EACH);
6611
6612         case KEY_exec:
6613             LOP(OP_EXEC,XREF);
6614
6615         case KEY_endhostent:
6616             FUN0(OP_EHOSTENT);
6617
6618         case KEY_endnetent:
6619             FUN0(OP_ENETENT);
6620
6621         case KEY_endservent:
6622             FUN0(OP_ESERVENT);
6623
6624         case KEY_endprotoent:
6625             FUN0(OP_EPROTOENT);
6626
6627         case KEY_endpwent:
6628             FUN0(OP_EPWENT);
6629
6630         case KEY_endgrent:
6631             FUN0(OP_EGRENT);
6632
6633         case KEY_for:
6634         case KEY_foreach:
6635             pl_yylval.ival = CopLINE(PL_curcop);
6636             s = SKIPSPACE1(s);
6637             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6638                 char *p = s;
6639 #ifdef PERL_MAD
6640                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6641 #endif
6642
6643                 if ((PL_bufend - p) >= 3 &&
6644                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6645                     p += 2;
6646                 else if ((PL_bufend - p) >= 4 &&
6647                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6648                     p += 3;
6649                 p = PEEKSPACE(p);
6650                 if (isIDFIRST_lazy_if(p,UTF)) {
6651                     p = scan_ident(p, PL_bufend,
6652                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6653                     p = PEEKSPACE(p);
6654                 }
6655                 if (*p != '$')
6656                     Perl_croak(aTHX_ "Missing $ on loop variable");
6657 #ifdef PERL_MAD
6658                 s = SvPVX(PL_linestr) + soff;
6659 #endif
6660             }
6661             OPERATOR(FOR);
6662
6663         case KEY_formline:
6664             LOP(OP_FORMLINE,XTERM);
6665
6666         case KEY_fork:
6667             FUN0(OP_FORK);
6668
6669         case KEY_fcntl:
6670             LOP(OP_FCNTL,XTERM);
6671
6672         case KEY_fileno:
6673             UNI(OP_FILENO);
6674
6675         case KEY_flock:
6676             LOP(OP_FLOCK,XTERM);
6677
6678         case KEY_gt:
6679             Rop(OP_SGT);
6680
6681         case KEY_ge:
6682             Rop(OP_SGE);
6683
6684         case KEY_grep:
6685             LOP(OP_GREPSTART, XREF);
6686
6687         case KEY_goto:
6688             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6689             LOOPX(OP_GOTO);
6690
6691         case KEY_gmtime:
6692             UNI(OP_GMTIME);
6693
6694         case KEY_getc:
6695             UNIDOR(OP_GETC);
6696
6697         case KEY_getppid:
6698             FUN0(OP_GETPPID);
6699
6700         case KEY_getpgrp:
6701             UNI(OP_GETPGRP);
6702
6703         case KEY_getpriority:
6704             LOP(OP_GETPRIORITY,XTERM);
6705
6706         case KEY_getprotobyname:
6707             UNI(OP_GPBYNAME);
6708
6709         case KEY_getprotobynumber:
6710             LOP(OP_GPBYNUMBER,XTERM);
6711
6712         case KEY_getprotoent:
6713             FUN0(OP_GPROTOENT);
6714
6715         case KEY_getpwent:
6716             FUN0(OP_GPWENT);
6717
6718         case KEY_getpwnam:
6719             UNI(OP_GPWNAM);
6720
6721         case KEY_getpwuid:
6722             UNI(OP_GPWUID);
6723
6724         case KEY_getpeername:
6725             UNI(OP_GETPEERNAME);
6726
6727         case KEY_gethostbyname:
6728             UNI(OP_GHBYNAME);
6729
6730         case KEY_gethostbyaddr:
6731             LOP(OP_GHBYADDR,XTERM);
6732
6733         case KEY_gethostent:
6734             FUN0(OP_GHOSTENT);
6735
6736         case KEY_getnetbyname:
6737             UNI(OP_GNBYNAME);
6738
6739         case KEY_getnetbyaddr:
6740             LOP(OP_GNBYADDR,XTERM);
6741
6742         case KEY_getnetent:
6743             FUN0(OP_GNETENT);
6744
6745         case KEY_getservbyname:
6746             LOP(OP_GSBYNAME,XTERM);
6747
6748         case KEY_getservbyport:
6749             LOP(OP_GSBYPORT,XTERM);
6750
6751         case KEY_getservent:
6752             FUN0(OP_GSERVENT);
6753
6754         case KEY_getsockname:
6755             UNI(OP_GETSOCKNAME);
6756
6757         case KEY_getsockopt:
6758             LOP(OP_GSOCKOPT,XTERM);
6759
6760         case KEY_getgrent:
6761             FUN0(OP_GGRENT);
6762
6763         case KEY_getgrnam:
6764             UNI(OP_GGRNAM);
6765
6766         case KEY_getgrgid:
6767             UNI(OP_GGRGID);
6768
6769         case KEY_getlogin:
6770             FUN0(OP_GETLOGIN);
6771
6772         case KEY_given:
6773             pl_yylval.ival = CopLINE(PL_curcop);
6774             OPERATOR(GIVEN);
6775
6776         case KEY_glob:
6777             LOP(OP_GLOB,XTERM);
6778
6779         case KEY_hex:
6780             UNI(OP_HEX);
6781
6782         case KEY_if:
6783             pl_yylval.ival = CopLINE(PL_curcop);
6784             OPERATOR(IF);
6785
6786         case KEY_index:
6787             LOP(OP_INDEX,XTERM);
6788
6789         case KEY_int:
6790             UNI(OP_INT);
6791
6792         case KEY_ioctl:
6793             LOP(OP_IOCTL,XTERM);
6794
6795         case KEY_join:
6796             LOP(OP_JOIN,XTERM);
6797
6798         case KEY_keys:
6799             UNI(OP_KEYS);
6800
6801         case KEY_kill:
6802             LOP(OP_KILL,XTERM);
6803
6804         case KEY_last:
6805             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6806             LOOPX(OP_LAST);
6807
6808         case KEY_lc:
6809             UNI(OP_LC);
6810
6811         case KEY_lcfirst:
6812             UNI(OP_LCFIRST);
6813
6814         case KEY_local:
6815             pl_yylval.ival = 0;
6816             OPERATOR(LOCAL);
6817
6818         case KEY_length:
6819             UNI(OP_LENGTH);
6820
6821         case KEY_lt:
6822             Rop(OP_SLT);
6823
6824         case KEY_le:
6825             Rop(OP_SLE);
6826
6827         case KEY_localtime:
6828             UNI(OP_LOCALTIME);
6829
6830         case KEY_log:
6831             UNI(OP_LOG);
6832
6833         case KEY_link:
6834             LOP(OP_LINK,XTERM);
6835
6836         case KEY_listen:
6837             LOP(OP_LISTEN,XTERM);
6838
6839         case KEY_lock:
6840             UNI(OP_LOCK);
6841
6842         case KEY_lstat:
6843             UNI(OP_LSTAT);
6844
6845         case KEY_m:
6846             s = scan_pat(s,OP_MATCH);
6847             TERM(sublex_start());
6848
6849         case KEY_map:
6850             LOP(OP_MAPSTART, XREF);
6851
6852         case KEY_mkdir:
6853             LOP(OP_MKDIR,XTERM);
6854
6855         case KEY_msgctl:
6856             LOP(OP_MSGCTL,XTERM);
6857
6858         case KEY_msgget:
6859             LOP(OP_MSGGET,XTERM);
6860
6861         case KEY_msgrcv:
6862             LOP(OP_MSGRCV,XTERM);
6863
6864         case KEY_msgsnd:
6865             LOP(OP_MSGSND,XTERM);
6866
6867         case KEY_our:
6868         case KEY_my:
6869         case KEY_state:
6870             PL_in_my = (U16)tmp;
6871             s = SKIPSPACE1(s);
6872             if (isIDFIRST_lazy_if(s,UTF)) {
6873 #ifdef PERL_MAD
6874                 char* start = s;
6875 #endif
6876                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6877                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6878                     goto really_sub;
6879                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6880                 if (!PL_in_my_stash) {
6881                     char tmpbuf[1024];
6882                     PL_bufptr = s;
6883                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6884                     yyerror(tmpbuf);
6885                 }
6886 #ifdef PERL_MAD
6887                 if (PL_madskills) {     /* just add type to declarator token */
6888                     sv_catsv(PL_thistoken, PL_nextwhite);
6889                     PL_nextwhite = 0;
6890                     sv_catpvn(PL_thistoken, start, s - start);
6891                 }
6892 #endif
6893             }
6894             pl_yylval.ival = 1;
6895             OPERATOR(MY);
6896
6897         case KEY_next:
6898             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6899             LOOPX(OP_NEXT);
6900
6901         case KEY_ne:
6902             Eop(OP_SNE);
6903
6904         case KEY_no:
6905             s = tokenize_use(0, s);
6906             OPERATOR(USE);
6907
6908         case KEY_not:
6909             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6910                 FUN1(OP_NOT);
6911             else
6912                 OPERATOR(NOTOP);
6913
6914         case KEY_open:
6915             s = SKIPSPACE1(s);
6916             if (isIDFIRST_lazy_if(s,UTF)) {
6917                 const char *t;
6918                 for (d = s; isALNUM_lazy_if(d,UTF);)
6919                     d++;
6920                 for (t=d; isSPACE(*t);)
6921                     t++;
6922                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6923                     /* [perl #16184] */
6924                     && !(t[0] == '=' && t[1] == '>')
6925                 ) {
6926                     int parms_len = (int)(d-s);
6927                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6928                            "Precedence problem: open %.*s should be open(%.*s)",
6929                             parms_len, s, parms_len, s);
6930                 }
6931             }
6932             LOP(OP_OPEN,XTERM);
6933
6934         case KEY_or:
6935             pl_yylval.ival = OP_OR;
6936             OPERATOR(OROP);
6937
6938         case KEY_ord:
6939             UNI(OP_ORD);
6940
6941         case KEY_oct:
6942             UNI(OP_OCT);
6943
6944         case KEY_opendir:
6945             LOP(OP_OPEN_DIR,XTERM);
6946
6947         case KEY_print:
6948             checkcomma(s,PL_tokenbuf,"filehandle");
6949             LOP(OP_PRINT,XREF);
6950
6951         case KEY_printf:
6952             checkcomma(s,PL_tokenbuf,"filehandle");
6953             LOP(OP_PRTF,XREF);
6954
6955         case KEY_prototype:
6956             UNI(OP_PROTOTYPE);
6957
6958         case KEY_push:
6959             LOP(OP_PUSH,XTERM);
6960
6961         case KEY_pop:
6962             UNIDOR(OP_POP);
6963
6964         case KEY_pos:
6965             UNIDOR(OP_POS);
6966
6967         case KEY_pack:
6968             LOP(OP_PACK,XTERM);
6969
6970         case KEY_package:
6971             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6972             s = force_version(s, FALSE);
6973             OPERATOR(PACKAGE);
6974
6975         case KEY_pipe:
6976             LOP(OP_PIPE_OP,XTERM);
6977
6978         case KEY_q:
6979             s = scan_str(s,!!PL_madskills,FALSE);
6980             if (!s)
6981                 missingterm(NULL);
6982             pl_yylval.ival = OP_CONST;
6983             TERM(sublex_start());
6984
6985         case KEY_quotemeta:
6986             UNI(OP_QUOTEMETA);
6987
6988         case KEY_qw:
6989             s = scan_str(s,!!PL_madskills,FALSE);
6990             if (!s)
6991                 missingterm(NULL);
6992             PL_expect = XOPERATOR;
6993             force_next(')');
6994             if (SvCUR(PL_lex_stuff)) {
6995                 OP *words = NULL;
6996                 int warned = 0;
6997                 d = SvPV_force(PL_lex_stuff, len);
6998                 while (len) {
6999                     for (; isSPACE(*d) && len; --len, ++d)
7000                         /**/;
7001                     if (len) {
7002                         SV *sv;
7003                         const char *b = d;
7004                         if (!warned && ckWARN(WARN_QW)) {
7005                             for (; !isSPACE(*d) && len; --len, ++d) {
7006                                 if (*d == ',') {
7007                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7008                                         "Possible attempt to separate words with commas");
7009                                     ++warned;
7010                                 }
7011                                 else if (*d == '#') {
7012                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7013                                         "Possible attempt to put comments in qw() list");
7014                                     ++warned;
7015                                 }
7016                             }
7017                         }
7018                         else {
7019                             for (; !isSPACE(*d) && len; --len, ++d)
7020                                 /**/;
7021                         }
7022                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7023                         words = append_elem(OP_LIST, words,
7024                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7025                     }
7026                 }
7027                 if (words) {
7028                     start_force(PL_curforce);
7029                     NEXTVAL_NEXTTOKE.opval = words;
7030                     force_next(THING);
7031                 }
7032             }
7033             if (PL_lex_stuff) {
7034                 SvREFCNT_dec(PL_lex_stuff);
7035                 PL_lex_stuff = NULL;
7036             }
7037             PL_expect = XTERM;
7038             TOKEN('(');
7039
7040         case KEY_qq:
7041             s = scan_str(s,!!PL_madskills,FALSE);
7042             if (!s)
7043                 missingterm(NULL);
7044             pl_yylval.ival = OP_STRINGIFY;
7045             if (SvIVX(PL_lex_stuff) == '\'')
7046                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7047             TERM(sublex_start());
7048
7049         case KEY_qr:
7050             s = scan_pat(s,OP_QR);
7051             TERM(sublex_start());
7052
7053         case KEY_qx:
7054             s = scan_str(s,!!PL_madskills,FALSE);
7055             if (!s)
7056                 missingterm(NULL);
7057             readpipe_override();
7058             TERM(sublex_start());
7059
7060         case KEY_return:
7061             OLDLOP(OP_RETURN);
7062
7063         case KEY_require:
7064             s = SKIPSPACE1(s);
7065             if (isDIGIT(*s)) {
7066                 s = force_version(s, FALSE);
7067             }
7068             else if (*s != 'v' || !isDIGIT(s[1])
7069                     || (s = force_version(s, TRUE), *s == 'v'))
7070             {
7071                 *PL_tokenbuf = '\0';
7072                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7073                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7074                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7075                 else if (*s == '<')
7076                     yyerror("<> should be quotes");
7077             }
7078             if (orig_keyword == KEY_require) {
7079                 orig_keyword = 0;
7080                 pl_yylval.ival = 1;
7081             }
7082             else
7083                 pl_yylval.ival = 0;
7084             PL_expect = XTERM;
7085             PL_bufptr = s;
7086             PL_last_uni = PL_oldbufptr;
7087             PL_last_lop_op = OP_REQUIRE;
7088             s = skipspace(s);
7089             return REPORT( (int)REQUIRE );
7090
7091         case KEY_reset:
7092             UNI(OP_RESET);
7093
7094         case KEY_redo:
7095             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7096             LOOPX(OP_REDO);
7097
7098         case KEY_rename:
7099             LOP(OP_RENAME,XTERM);
7100
7101         case KEY_rand:
7102             UNI(OP_RAND);
7103
7104         case KEY_rmdir:
7105             UNI(OP_RMDIR);
7106
7107         case KEY_rindex:
7108             LOP(OP_RINDEX,XTERM);
7109
7110         case KEY_read:
7111             LOP(OP_READ,XTERM);
7112
7113         case KEY_readdir:
7114             UNI(OP_READDIR);
7115
7116         case KEY_readline:
7117             UNIDOR(OP_READLINE);
7118
7119         case KEY_readpipe:
7120             UNIDOR(OP_BACKTICK);
7121
7122         case KEY_rewinddir:
7123             UNI(OP_REWINDDIR);
7124
7125         case KEY_recv:
7126             LOP(OP_RECV,XTERM);
7127
7128         case KEY_reverse:
7129             LOP(OP_REVERSE,XTERM);
7130
7131         case KEY_readlink:
7132             UNIDOR(OP_READLINK);
7133
7134         case KEY_ref:
7135             UNI(OP_REF);
7136
7137         case KEY_s:
7138             s = scan_subst(s);
7139             if (pl_yylval.opval)
7140                 TERM(sublex_start());
7141             else
7142                 TOKEN(1);       /* force error */
7143
7144         case KEY_say:
7145             checkcomma(s,PL_tokenbuf,"filehandle");
7146             LOP(OP_SAY,XREF);
7147
7148         case KEY_chomp:
7149             UNI(OP_CHOMP);
7150
7151         case KEY_scalar:
7152             UNI(OP_SCALAR);
7153
7154         case KEY_select:
7155             LOP(OP_SELECT,XTERM);
7156
7157         case KEY_seek:
7158             LOP(OP_SEEK,XTERM);
7159
7160         case KEY_semctl:
7161             LOP(OP_SEMCTL,XTERM);
7162
7163         case KEY_semget:
7164             LOP(OP_SEMGET,XTERM);
7165
7166         case KEY_semop:
7167             LOP(OP_SEMOP,XTERM);
7168
7169         case KEY_send:
7170             LOP(OP_SEND,XTERM);
7171
7172         case KEY_setpgrp:
7173             LOP(OP_SETPGRP,XTERM);
7174
7175         case KEY_setpriority:
7176             LOP(OP_SETPRIORITY,XTERM);
7177
7178         case KEY_sethostent:
7179             UNI(OP_SHOSTENT);
7180
7181         case KEY_setnetent:
7182             UNI(OP_SNETENT);
7183
7184         case KEY_setservent:
7185             UNI(OP_SSERVENT);
7186
7187         case KEY_setprotoent:
7188             UNI(OP_SPROTOENT);
7189
7190         case KEY_setpwent:
7191             FUN0(OP_SPWENT);
7192
7193         case KEY_setgrent:
7194             FUN0(OP_SGRENT);
7195
7196         case KEY_seekdir:
7197             LOP(OP_SEEKDIR,XTERM);
7198
7199         case KEY_setsockopt:
7200             LOP(OP_SSOCKOPT,XTERM);
7201
7202         case KEY_shift:
7203             UNIDOR(OP_SHIFT);
7204
7205         case KEY_shmctl:
7206             LOP(OP_SHMCTL,XTERM);
7207
7208         case KEY_shmget:
7209             LOP(OP_SHMGET,XTERM);
7210
7211         case KEY_shmread:
7212             LOP(OP_SHMREAD,XTERM);
7213
7214         case KEY_shmwrite:
7215             LOP(OP_SHMWRITE,XTERM);
7216
7217         case KEY_shutdown:
7218             LOP(OP_SHUTDOWN,XTERM);
7219
7220         case KEY_sin:
7221             UNI(OP_SIN);
7222
7223         case KEY_sleep:
7224             UNI(OP_SLEEP);
7225
7226         case KEY_socket:
7227             LOP(OP_SOCKET,XTERM);
7228
7229         case KEY_socketpair:
7230             LOP(OP_SOCKPAIR,XTERM);
7231
7232         case KEY_sort:
7233             checkcomma(s,PL_tokenbuf,"subroutine name");
7234             s = SKIPSPACE1(s);
7235             if (*s == ';' || *s == ')')         /* probably a close */
7236                 Perl_croak(aTHX_ "sort is now a reserved word");
7237             PL_expect = XTERM;
7238             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7239             LOP(OP_SORT,XREF);
7240
7241         case KEY_split:
7242             LOP(OP_SPLIT,XTERM);
7243
7244         case KEY_sprintf:
7245             LOP(OP_SPRINTF,XTERM);
7246
7247         case KEY_splice:
7248             LOP(OP_SPLICE,XTERM);
7249
7250         case KEY_sqrt:
7251             UNI(OP_SQRT);
7252
7253         case KEY_srand:
7254             UNI(OP_SRAND);
7255
7256         case KEY_stat:
7257             UNI(OP_STAT);
7258
7259         case KEY_study:
7260             UNI(OP_STUDY);
7261
7262         case KEY_substr:
7263             LOP(OP_SUBSTR,XTERM);
7264
7265         case KEY_format:
7266         case KEY_sub:
7267           really_sub:
7268             {
7269                 char tmpbuf[sizeof PL_tokenbuf];
7270                 SSize_t tboffset = 0;
7271                 expectation attrful;
7272                 bool have_name, have_proto;
7273                 const int key = tmp;
7274
7275 #ifdef PERL_MAD
7276                 SV *tmpwhite = 0;
7277
7278                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7279                 SV *subtoken = newSVpvn(tstart, s - tstart);
7280                 PL_thistoken = 0;
7281
7282                 d = s;
7283                 s = SKIPSPACE2(s,tmpwhite);
7284 #else
7285                 s = skipspace(s);
7286 #endif
7287
7288                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7289                     (*s == ':' && s[1] == ':'))
7290                 {
7291 #ifdef PERL_MAD
7292                     SV *nametoke = NULL;
7293 #endif
7294
7295                     PL_expect = XBLOCK;
7296                     attrful = XATTRBLOCK;
7297                     /* remember buffer pos'n for later force_word */
7298                     tboffset = s - PL_oldbufptr;
7299                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7300 #ifdef PERL_MAD
7301                     if (PL_madskills)
7302                         nametoke = newSVpvn(s, d - s);
7303 #endif
7304                     if (memchr(tmpbuf, ':', len))
7305                         sv_setpvn(PL_subname, tmpbuf, len);
7306                     else {
7307                         sv_setsv(PL_subname,PL_curstname);
7308                         sv_catpvs(PL_subname,"::");
7309                         sv_catpvn(PL_subname,tmpbuf,len);
7310                     }
7311                     have_name = TRUE;
7312
7313 #ifdef PERL_MAD
7314
7315                     start_force(0);
7316                     CURMAD('X', nametoke);
7317                     CURMAD('_', tmpwhite);
7318                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7319                                       FALSE, TRUE, TRUE);
7320
7321                     s = SKIPSPACE2(d,tmpwhite);
7322 #else
7323                     s = skipspace(d);
7324 #endif
7325                 }
7326                 else {
7327                     if (key == KEY_my)
7328                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7329                     PL_expect = XTERMBLOCK;
7330                     attrful = XATTRTERM;
7331                     sv_setpvs(PL_subname,"?");
7332                     have_name = FALSE;
7333                 }
7334
7335                 if (key == KEY_format) {
7336                     if (*s == '=')
7337                         PL_lex_formbrack = PL_lex_brackets + 1;
7338 #ifdef PERL_MAD
7339                     PL_thistoken = subtoken;
7340                     s = d;
7341 #else
7342                     if (have_name)
7343                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7344                                           FALSE, TRUE, TRUE);
7345 #endif
7346                     OPERATOR(FORMAT);
7347                 }
7348
7349                 /* Look for a prototype */
7350                 if (*s == '(') {
7351                     char *p;
7352                     bool bad_proto = FALSE;
7353                     bool in_brackets = FALSE;
7354                     char greedy_proto = ' ';
7355                     bool proto_after_greedy_proto = FALSE;
7356                     bool must_be_last = FALSE;
7357                     bool underscore = FALSE;
7358                     bool seen_underscore = FALSE;
7359                     const bool warnsyntax = ckWARN(WARN_SYNTAX);
7360
7361                     s = scan_str(s,!!PL_madskills,FALSE);
7362                     if (!s)
7363                         Perl_croak(aTHX_ "Prototype not terminated");
7364                     /* strip spaces and check for bad characters */
7365                     d = SvPVX(PL_lex_stuff);
7366                     tmp = 0;
7367                     for (p = d; *p; ++p) {
7368                         if (!isSPACE(*p)) {
7369                             d[tmp++] = *p;
7370
7371                             if (warnsyntax) {
7372                                 if (must_be_last)
7373                                     proto_after_greedy_proto = TRUE;
7374                                 if (!strchr("$@%*;[]&\\_", *p)) {
7375                                     bad_proto = TRUE;
7376                                 }
7377                                 else {
7378                                     if ( underscore ) {
7379                                         if ( *p != ';' )
7380                                             bad_proto = TRUE;
7381                                         underscore = FALSE;
7382                                     }
7383                                     if ( *p == '[' ) {
7384                                         in_brackets = TRUE;
7385                                     }
7386                                     else if ( *p == ']' ) {
7387                                         in_brackets = FALSE;
7388                                     }
7389                                     else if ( (*p == '@' || *p == '%') &&
7390                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7391                                          !in_brackets ) {
7392                                         must_be_last = TRUE;
7393                                         greedy_proto = *p;
7394                                     }
7395                                     else if ( *p == '_' ) {
7396                                         underscore = seen_underscore = TRUE;
7397                                     }
7398                                 }
7399                             }
7400                         }
7401                     }
7402                     d[tmp] = '\0';
7403                     if (proto_after_greedy_proto)
7404                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7405                                     "Prototype after '%c' for %"SVf" : %s",
7406                                     greedy_proto, SVfARG(PL_subname), d);
7407                     if (bad_proto)
7408                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7409                                     "Illegal character %sin prototype for %"SVf" : %s",
7410                                     seen_underscore ? "after '_' " : "",
7411                                     SVfARG(PL_subname), d);
7412                     SvCUR_set(PL_lex_stuff, tmp);
7413                     have_proto = TRUE;
7414
7415 #ifdef PERL_MAD
7416                     start_force(0);
7417                     CURMAD('q', PL_thisopen);
7418                     CURMAD('_', tmpwhite);
7419                     CURMAD('=', PL_thisstuff);
7420                     CURMAD('Q', PL_thisclose);
7421                     NEXTVAL_NEXTTOKE.opval =
7422                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7423                     PL_lex_stuff = NULL;
7424                     force_next(THING);
7425
7426                     s = SKIPSPACE2(s,tmpwhite);
7427 #else
7428                     s = skipspace(s);
7429 #endif
7430                 }
7431                 else
7432                     have_proto = FALSE;
7433
7434                 if (*s == ':' && s[1] != ':')
7435                     PL_expect = attrful;
7436                 else if (*s != '{' && key == KEY_sub) {
7437                     if (!have_name)
7438                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7439                     else if (*s != ';')
7440                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7441                 }
7442
7443 #ifdef PERL_MAD
7444                 start_force(0);
7445                 if (tmpwhite) {
7446                     if (PL_madskills)
7447                         curmad('^', newSVpvs(""));
7448                     CURMAD('_', tmpwhite);
7449                 }
7450                 force_next(0);
7451
7452                 PL_thistoken = subtoken;
7453 #else
7454                 if (have_proto) {
7455                     NEXTVAL_NEXTTOKE.opval =
7456                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7457                     PL_lex_stuff = NULL;
7458                     force_next(THING);
7459                 }
7460 #endif
7461                 if (!have_name) {
7462                     if (PL_curstash)
7463                         sv_setpvs(PL_subname, "__ANON__");
7464                     else
7465                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7466                     TOKEN(ANONSUB);
7467                 }
7468 #ifndef PERL_MAD
7469                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7470                                   FALSE, TRUE, TRUE);
7471 #endif
7472                 if (key == KEY_my)
7473                     TOKEN(MYSUB);
7474                 TOKEN(SUB);
7475             }
7476
7477         case KEY_system:
7478             LOP(OP_SYSTEM,XREF);
7479
7480         case KEY_symlink:
7481             LOP(OP_SYMLINK,XTERM);
7482
7483         case KEY_syscall:
7484             LOP(OP_SYSCALL,XTERM);
7485
7486         case KEY_sysopen:
7487             LOP(OP_SYSOPEN,XTERM);
7488
7489         case KEY_sysseek:
7490             LOP(OP_SYSSEEK,XTERM);
7491
7492         case KEY_sysread:
7493             LOP(OP_SYSREAD,XTERM);
7494
7495         case KEY_syswrite:
7496             LOP(OP_SYSWRITE,XTERM);
7497
7498         case KEY_tr:
7499             s = scan_trans(s);
7500             TERM(sublex_start());
7501
7502         case KEY_tell:
7503             UNI(OP_TELL);
7504
7505         case KEY_telldir:
7506             UNI(OP_TELLDIR);
7507
7508         case KEY_tie:
7509             LOP(OP_TIE,XTERM);
7510
7511         case KEY_tied:
7512             UNI(OP_TIED);
7513
7514         case KEY_time:
7515             FUN0(OP_TIME);
7516
7517         case KEY_times:
7518             FUN0(OP_TMS);
7519
7520         case KEY_truncate:
7521             LOP(OP_TRUNCATE,XTERM);
7522
7523         case KEY_uc:
7524             UNI(OP_UC);
7525
7526         case KEY_ucfirst:
7527             UNI(OP_UCFIRST);
7528
7529         case KEY_untie:
7530             UNI(OP_UNTIE);
7531
7532         case KEY_until:
7533             pl_yylval.ival = CopLINE(PL_curcop);
7534             OPERATOR(UNTIL);
7535
7536         case KEY_unless:
7537             pl_yylval.ival = CopLINE(PL_curcop);
7538             OPERATOR(UNLESS);
7539
7540         case KEY_unlink:
7541             LOP(OP_UNLINK,XTERM);
7542
7543         case KEY_undef:
7544             UNIDOR(OP_UNDEF);
7545
7546         case KEY_unpack:
7547             LOP(OP_UNPACK,XTERM);
7548
7549         case KEY_utime:
7550             LOP(OP_UTIME,XTERM);
7551
7552         case KEY_umask:
7553             UNIDOR(OP_UMASK);
7554
7555         case KEY_unshift:
7556             LOP(OP_UNSHIFT,XTERM);
7557
7558         case KEY_use:
7559             s = tokenize_use(1, s);
7560             OPERATOR(USE);
7561
7562         case KEY_values:
7563             UNI(OP_VALUES);
7564
7565         case KEY_vec:
7566             LOP(OP_VEC,XTERM);
7567
7568         case KEY_when:
7569             pl_yylval.ival = CopLINE(PL_curcop);
7570             OPERATOR(WHEN);
7571
7572         case KEY_while:
7573             pl_yylval.ival = CopLINE(PL_curcop);
7574             OPERATOR(WHILE);
7575
7576         case KEY_warn:
7577             PL_hints |= HINT_BLOCK_SCOPE;
7578             LOP(OP_WARN,XTERM);
7579
7580         case KEY_wait:
7581             FUN0(OP_WAIT);
7582
7583         case KEY_waitpid:
7584             LOP(OP_WAITPID,XTERM);
7585
7586         case KEY_wantarray:
7587             FUN0(OP_WANTARRAY);
7588
7589         case KEY_write:
7590 #ifdef EBCDIC
7591         {
7592             char ctl_l[2];
7593             ctl_l[0] = toCTRL('L');
7594             ctl_l[1] = '\0';
7595             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7596         }
7597 #else
7598             /* Make sure $^L is defined */
7599             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7600 #endif
7601             UNI(OP_ENTERWRITE);
7602
7603         case KEY_x:
7604             if (PL_expect == XOPERATOR)
7605                 Mop(OP_REPEAT);
7606             check_uni();
7607             goto just_a_word;
7608
7609         case KEY_xor:
7610             pl_yylval.ival = OP_XOR;
7611             OPERATOR(OROP);
7612
7613         case KEY_y:
7614             s = scan_trans(s);
7615             TERM(sublex_start());
7616         }
7617     }}
7618 }
7619 #ifdef __SC__
7620 #pragma segment Main
7621 #endif
7622
7623 static int
7624 S_pending_ident(pTHX)
7625 {
7626     dVAR;
7627     register char *d;
7628     PADOFFSET tmp = 0;
7629     /* pit holds the identifier we read and pending_ident is reset */
7630     char pit = PL_pending_ident;
7631     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7632     /* All routes through this function want to know if there is a colon.  */
7633     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7634     PL_pending_ident = 0;
7635
7636     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7637     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7638           "### Pending identifier '%s'\n", PL_tokenbuf); });
7639
7640     /* if we're in a my(), we can't allow dynamics here.
7641        $foo'bar has already been turned into $foo::bar, so
7642        just check for colons.
7643
7644        if it's a legal name, the OP is a PADANY.
7645     */
7646     if (PL_in_my) {
7647         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7648             if (has_colon)
7649                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7650                                   "variable %s in \"our\"",
7651                                   PL_tokenbuf));
7652             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7653         }
7654         else {
7655             if (has_colon)
7656                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7657                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7658
7659             pl_yylval.opval = newOP(OP_PADANY, 0);
7660             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7661             return PRIVATEREF;
7662         }
7663     }
7664
7665     /*
7666        build the ops for accesses to a my() variable.
7667
7668        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7669        then used in a comparison.  This catches most, but not
7670        all cases.  For instance, it catches
7671            sort { my($a); $a <=> $b }
7672        but not
7673            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7674        (although why you'd do that is anyone's guess).
7675     */
7676
7677     if (!has_colon) {
7678         if (!PL_in_my)
7679             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7680         if (tmp != NOT_IN_PAD) {
7681             /* might be an "our" variable" */
7682             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7683                 /* build ops for a bareword */
7684                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7685                 HEK * const stashname = HvNAME_HEK(stash);
7686                 SV *  const sym = newSVhek(stashname);
7687                 sv_catpvs(sym, "::");
7688                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7689                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7690                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7691                 gv_fetchsv(sym,
7692                     (PL_in_eval
7693                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7694                         : GV_ADDMULTI
7695                     ),
7696                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7697                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7698                      : SVt_PVHV));
7699                 return WORD;
7700             }
7701
7702             /* if it's a sort block and they're naming $a or $b */
7703             if (PL_last_lop_op == OP_SORT &&
7704                 PL_tokenbuf[0] == '$' &&
7705                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7706                 && !PL_tokenbuf[2])
7707             {
7708                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7709                      d < PL_bufend && *d != '\n';
7710                      d++)
7711                 {
7712                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7713                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7714                               PL_tokenbuf);
7715                     }
7716                 }
7717             }
7718
7719             pl_yylval.opval = newOP(OP_PADANY, 0);
7720             pl_yylval.opval->op_targ = tmp;
7721             return PRIVATEREF;
7722         }
7723     }
7724
7725     /*
7726        Whine if they've said @foo in a doublequoted string,
7727        and @foo isn't a variable we can find in the symbol
7728        table.
7729     */
7730     if (ckWARN(WARN_AMBIGUOUS) &&
7731         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7732         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7733                                          SVt_PVAV);
7734         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7735                 /* DO NOT warn for @- and @+ */
7736                 && !( PL_tokenbuf[2] == '\0' &&
7737                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7738            )
7739         {
7740             /* Downgraded from fatal to warning 20000522 mjd */
7741             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7742                         "Possible unintended interpolation of %s in string",
7743                         PL_tokenbuf);
7744         }
7745     }
7746
7747     /* build ops for a bareword */
7748     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7749                                                       tokenbuf_len - 1));
7750     pl_yylval.opval->op_private = OPpCONST_ENTERED;
7751     gv_fetchpvn_flags(
7752             PL_tokenbuf + 1, tokenbuf_len - 1,
7753             /* If the identifier refers to a stash, don't autovivify it.
7754              * Change 24660 had the side effect of causing symbol table
7755              * hashes to always be defined, even if they were freshly
7756              * created and the only reference in the entire program was
7757              * the single statement with the defined %foo::bar:: test.
7758              * It appears that all code in the wild doing this actually
7759              * wants to know whether sub-packages have been loaded, so
7760              * by avoiding auto-vivifying symbol tables, we ensure that
7761              * defined %foo::bar:: continues to be false, and the existing
7762              * tests still give the expected answers, even though what
7763              * they're actually testing has now changed subtly.
7764              */
7765             (*PL_tokenbuf == '%'
7766              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7767              && d[-1] == ':'
7768              ? 0
7769              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7770             ((PL_tokenbuf[0] == '$') ? SVt_PV
7771              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7772              : SVt_PVHV));
7773     return WORD;
7774 }
7775
7776 /*
7777  *  The following code was generated by perl_keyword.pl.
7778  */
7779
7780 I32
7781 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7782 {
7783     dVAR;
7784
7785     PERL_ARGS_ASSERT_KEYWORD;
7786
7787   switch (len)
7788   {
7789     case 1: /* 5 tokens of length 1 */
7790       switch (name[0])
7791       {
7792         case 'm':
7793           {                                       /* m          */
7794             return KEY_m;
7795           }
7796
7797         case 'q':
7798           {                                       /* q          */
7799             return KEY_q;
7800           }
7801
7802         case 's':
7803           {                                       /* s          */
7804             return KEY_s;
7805           }
7806
7807         case 'x':
7808           {                                       /* x          */
7809             return -KEY_x;
7810           }
7811
7812         case 'y':
7813           {                                       /* y          */
7814             return KEY_y;
7815           }
7816
7817         default:
7818           goto unknown;
7819       }
7820
7821     case 2: /* 18 tokens of length 2 */
7822       switch (name[0])
7823       {
7824         case 'd':
7825           if (name[1] == 'o')
7826           {                                       /* do         */
7827             return KEY_do;
7828           }
7829
7830           goto unknown;
7831
7832         case 'e':
7833           if (name[1] == 'q')
7834           {                                       /* eq         */
7835             return -KEY_eq;
7836           }
7837
7838           goto unknown;
7839
7840         case 'g':
7841           switch (name[1])
7842           {
7843             case 'e':
7844               {                                   /* ge         */
7845                 return -KEY_ge;
7846               }
7847
7848             case 't':
7849               {                                   /* gt         */
7850                 return -KEY_gt;
7851               }
7852
7853             default:
7854               goto unknown;
7855           }
7856
7857         case 'i':
7858           if (name[1] == 'f')
7859           {                                       /* if         */
7860             return KEY_if;
7861           }
7862
7863           goto unknown;
7864
7865         case 'l':
7866           switch (name[1])
7867           {
7868             case 'c':
7869               {                                   /* lc         */
7870                 return -KEY_lc;
7871               }
7872
7873             case 'e':
7874               {                                   /* le         */
7875                 return -KEY_le;
7876               }
7877
7878             case 't':
7879               {                                   /* lt         */
7880                 return -KEY_lt;
7881               }
7882
7883             default:
7884               goto unknown;
7885           }
7886
7887         case 'm':
7888           if (name[1] == 'y')
7889           {                                       /* my         */
7890             return KEY_my;
7891           }
7892
7893           goto unknown;
7894
7895         case 'n':
7896           switch (name[1])
7897           {
7898             case 'e':
7899               {                                   /* ne         */
7900                 return -KEY_ne;
7901               }
7902
7903             case 'o':
7904               {                                   /* no         */
7905                 return KEY_no;
7906               }
7907
7908             default:
7909               goto unknown;
7910           }
7911
7912         case 'o':
7913           if (name[1] == 'r')
7914           {                                       /* or         */
7915             return -KEY_or;
7916           }
7917
7918           goto unknown;
7919
7920         case 'q':
7921           switch (name[1])
7922           {
7923             case 'q':
7924               {                                   /* qq         */
7925                 return KEY_qq;
7926               }
7927
7928             case 'r':
7929               {                                   /* qr         */
7930                 return KEY_qr;
7931               }
7932
7933             case 'w':
7934               {                                   /* qw         */
7935                 return KEY_qw;
7936               }
7937
7938             case 'x':
7939               {                                   /* qx         */
7940                 return KEY_qx;
7941               }
7942
7943             default:
7944               goto unknown;
7945           }
7946
7947         case 't':
7948           if (name[1] == 'r')
7949           {                                       /* tr         */
7950             return KEY_tr;
7951           }
7952
7953           goto unknown;
7954
7955         case 'u':
7956           if (name[1] == 'c')
7957           {                                       /* uc         */
7958             return -KEY_uc;
7959           }
7960
7961           goto unknown;
7962
7963         default:
7964           goto unknown;
7965       }
7966
7967     case 3: /* 29 tokens of length 3 */
7968       switch (name[0])
7969       {
7970         case 'E':
7971           if (name[1] == 'N' &&
7972               name[2] == 'D')
7973           {                                       /* END        */
7974             return KEY_END;
7975           }
7976
7977           goto unknown;
7978
7979         case 'a':
7980           switch (name[1])
7981           {
7982             case 'b':
7983               if (name[2] == 's')
7984               {                                   /* abs        */
7985                 return -KEY_abs;
7986               }
7987
7988               goto unknown;
7989
7990             case 'n':
7991               if (name[2] == 'd')
7992               {                                   /* and        */
7993                 return -KEY_and;
7994               }
7995
7996               goto unknown;
7997
7998             default:
7999               goto unknown;
8000           }
8001
8002         case 'c':
8003           switch (name[1])
8004           {
8005             case 'h':
8006               if (name[2] == 'r')
8007               {                                   /* chr        */
8008                 return -KEY_chr;
8009               }
8010
8011               goto unknown;
8012
8013             case 'm':
8014               if (name[2] == 'p')
8015               {                                   /* cmp        */
8016                 return -KEY_cmp;
8017               }
8018
8019               goto unknown;
8020
8021             case 'o':
8022               if (name[2] == 's')
8023               {                                   /* cos        */
8024                 return -KEY_cos;
8025               }
8026
8027               goto unknown;
8028
8029             default:
8030               goto unknown;
8031           }
8032
8033         case 'd':
8034           if (name[1] == 'i' &&
8035               name[2] == 'e')
8036           {                                       /* die        */
8037             return -KEY_die;
8038           }
8039
8040           goto unknown;
8041
8042         case 'e':
8043           switch (name[1])
8044           {
8045             case 'o':
8046               if (name[2] == 'f')
8047               {                                   /* eof        */
8048                 return -KEY_eof;
8049               }
8050
8051               goto unknown;
8052
8053             case 'x':
8054               if (name[2] == 'p')
8055               {                                   /* exp        */
8056                 return -KEY_exp;
8057               }
8058
8059               goto unknown;
8060
8061             default:
8062               goto unknown;
8063           }
8064
8065         case 'f':
8066           if (name[1] == 'o' &&
8067               name[2] == 'r')
8068           {                                       /* for        */
8069             return KEY_for;
8070           }
8071
8072           goto unknown;
8073
8074         case 'h':
8075           if (name[1] == 'e' &&
8076               name[2] == 'x')
8077           {                                       /* hex        */
8078             return -KEY_hex;
8079           }
8080
8081           goto unknown;
8082
8083         case 'i':
8084           if (name[1] == 'n' &&
8085               name[2] == 't')
8086           {                                       /* int        */
8087             return -KEY_int;
8088           }
8089
8090           goto unknown;
8091
8092         case 'l':
8093           if (name[1] == 'o' &&
8094               name[2] == 'g')
8095           {                                       /* log        */
8096             return -KEY_log;
8097           }
8098
8099           goto unknown;
8100
8101         case 'm':
8102           if (name[1] == 'a' &&
8103               name[2] == 'p')
8104           {                                       /* map        */
8105             return KEY_map;
8106           }
8107
8108           goto unknown;
8109
8110         case 'n':
8111           if (name[1] == 'o' &&
8112               name[2] == 't')
8113           {                                       /* not        */
8114             return -KEY_not;
8115           }
8116
8117           goto unknown;
8118
8119         case 'o':
8120           switch (name[1])
8121           {
8122             case 'c':
8123               if (name[2] == 't')
8124               {                                   /* oct        */
8125                 return -KEY_oct;
8126               }
8127
8128               goto unknown;
8129
8130             case 'r':
8131               if (name[2] == 'd')
8132               {                                   /* ord        */
8133                 return -KEY_ord;
8134               }
8135
8136               goto unknown;
8137
8138             case 'u':
8139               if (name[2] == 'r')
8140               {                                   /* our        */
8141                 return KEY_our;
8142               }
8143
8144               goto unknown;
8145
8146             default:
8147               goto unknown;
8148           }
8149
8150         case 'p':
8151           if (name[1] == 'o')
8152           {
8153             switch (name[2])
8154             {
8155               case 'p':
8156                 {                                 /* pop        */
8157                   return -KEY_pop;
8158                 }
8159
8160               case 's':
8161                 {                                 /* pos        */
8162                   return KEY_pos;
8163                 }
8164
8165               default:
8166                 goto unknown;
8167             }
8168           }
8169
8170           goto unknown;
8171
8172         case 'r':
8173           if (name[1] == 'e' &&
8174               name[2] == 'f')
8175           {                                       /* ref        */
8176             return -KEY_ref;
8177           }
8178
8179           goto unknown;
8180
8181         case 's':
8182           switch (name[1])
8183           {
8184             case 'a':
8185               if (name[2] == 'y')
8186               {                                   /* say        */
8187                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8188               }
8189
8190               goto unknown;
8191
8192             case 'i':
8193               if (name[2] == 'n')
8194               {                                   /* sin        */
8195                 return -KEY_sin;
8196               }
8197
8198               goto unknown;
8199
8200             case 'u':
8201               if (name[2] == 'b')
8202               {                                   /* sub        */
8203                 return KEY_sub;
8204               }
8205
8206               goto unknown;
8207
8208             default:
8209               goto unknown;
8210           }
8211
8212         case 't':
8213           if (name[1] == 'i' &&
8214               name[2] == 'e')
8215           {                                       /* tie        */
8216             return KEY_tie;
8217           }
8218
8219           goto unknown;
8220
8221         case 'u':
8222           if (name[1] == 's' &&
8223               name[2] == 'e')
8224           {                                       /* use        */
8225             return KEY_use;
8226           }
8227
8228           goto unknown;
8229
8230         case 'v':
8231           if (name[1] == 'e' &&
8232               name[2] == 'c')
8233           {                                       /* vec        */
8234             return -KEY_vec;
8235           }
8236
8237           goto unknown;
8238
8239         case 'x':
8240           if (name[1] == 'o' &&
8241               name[2] == 'r')
8242           {                                       /* xor        */
8243             return -KEY_xor;
8244           }
8245
8246           goto unknown;
8247
8248         default:
8249           goto unknown;
8250       }
8251
8252     case 4: /* 41 tokens of length 4 */
8253       switch (name[0])
8254       {
8255         case 'C':
8256           if (name[1] == 'O' &&
8257               name[2] == 'R' &&
8258               name[3] == 'E')
8259           {                                       /* CORE       */
8260             return -KEY_CORE;
8261           }
8262
8263           goto unknown;
8264
8265         case 'I':
8266           if (name[1] == 'N' &&
8267               name[2] == 'I' &&
8268               name[3] == 'T')
8269           {                                       /* INIT       */
8270             return KEY_INIT;
8271           }
8272
8273           goto unknown;
8274
8275         case 'b':
8276           if (name[1] == 'i' &&
8277               name[2] == 'n' &&
8278               name[3] == 'd')
8279           {                                       /* bind       */
8280             return -KEY_bind;
8281           }
8282
8283           goto unknown;
8284
8285         case 'c':
8286           if (name[1] == 'h' &&
8287               name[2] == 'o' &&
8288               name[3] == 'p')
8289           {                                       /* chop       */
8290             return -KEY_chop;
8291           }
8292
8293           goto unknown;
8294
8295         case 'd':
8296           if (name[1] == 'u' &&
8297               name[2] == 'm' &&
8298               name[3] == 'p')
8299           {                                       /* dump       */
8300             return -KEY_dump;
8301           }
8302
8303           goto unknown;
8304
8305         case 'e':
8306           switch (name[1])
8307           {
8308             case 'a':
8309               if (name[2] == 'c' &&
8310                   name[3] == 'h')
8311               {                                   /* each       */
8312                 return -KEY_each;
8313               }
8314
8315               goto unknown;
8316
8317             case 'l':
8318               if (name[2] == 's' &&
8319                   name[3] == 'e')
8320               {                                   /* else       */
8321                 return KEY_else;
8322               }
8323
8324               goto unknown;
8325
8326             case 'v':
8327               if (name[2] == 'a' &&
8328                   name[3] == 'l')
8329               {                                   /* eval       */
8330                 return KEY_eval;
8331               }
8332
8333               goto unknown;
8334
8335             case 'x':
8336               switch (name[2])
8337               {
8338                 case 'e':
8339                   if (name[3] == 'c')
8340                   {                               /* exec       */
8341                     return -KEY_exec;
8342                   }
8343
8344                   goto unknown;
8345
8346                 case 'i':
8347                   if (name[3] == 't')
8348                   {                               /* exit       */
8349                     return -KEY_exit;
8350                   }
8351
8352                   goto unknown;
8353
8354                 default:
8355                   goto unknown;
8356               }
8357
8358             default:
8359               goto unknown;
8360           }
8361
8362         case 'f':
8363           if (name[1] == 'o' &&
8364               name[2] == 'r' &&
8365               name[3] == 'k')
8366           {                                       /* fork       */
8367             return -KEY_fork;
8368           }
8369
8370           goto unknown;
8371
8372         case 'g':
8373           switch (name[1])
8374           {
8375             case 'e':
8376               if (name[2] == 't' &&
8377                   name[3] == 'c')
8378               {                                   /* getc       */
8379                 return -KEY_getc;
8380               }
8381
8382               goto unknown;
8383
8384             case 'l':
8385               if (name[2] == 'o' &&
8386                   name[3] == 'b')
8387               {                                   /* glob       */
8388                 return KEY_glob;
8389               }
8390
8391               goto unknown;
8392
8393             case 'o':
8394               if (name[2] == 't' &&
8395                   name[3] == 'o')
8396               {                                   /* goto       */
8397                 return KEY_goto;
8398               }
8399
8400               goto unknown;
8401
8402             case 'r':
8403               if (name[2] == 'e' &&
8404                   name[3] == 'p')
8405               {                                   /* grep       */
8406                 return KEY_grep;
8407               }
8408
8409               goto unknown;
8410
8411             default:
8412               goto unknown;
8413           }
8414
8415         case 'j':
8416           if (name[1] == 'o' &&
8417               name[2] == 'i' &&
8418               name[3] == 'n')
8419           {                                       /* join       */
8420             return -KEY_join;
8421           }
8422
8423           goto unknown;
8424
8425         case 'k':
8426           switch (name[1])
8427           {
8428             case 'e':
8429               if (name[2] == 'y' &&
8430                   name[3] == 's')
8431               {                                   /* keys       */
8432                 return -KEY_keys;
8433               }
8434
8435               goto unknown;
8436
8437             case 'i':
8438               if (name[2] == 'l' &&
8439                   name[3] == 'l')
8440               {                                   /* kill       */
8441                 return -KEY_kill;
8442               }
8443
8444               goto unknown;
8445
8446             default:
8447               goto unknown;
8448           }
8449
8450         case 'l':
8451           switch (name[1])
8452           {
8453             case 'a':
8454               if (name[2] == 's' &&
8455                   name[3] == 't')
8456               {                                   /* last       */
8457                 return KEY_last;
8458               }
8459
8460               goto unknown;
8461
8462             case 'i':
8463               if (name[2] == 'n' &&
8464                   name[3] == 'k')
8465               {                                   /* link       */
8466                 return -KEY_link;
8467               }
8468
8469               goto unknown;
8470
8471             case 'o':
8472               if (name[2] == 'c' &&
8473                   name[3] == 'k')
8474               {                                   /* lock       */
8475                 return -KEY_lock;
8476               }
8477
8478               goto unknown;
8479
8480             default:
8481               goto unknown;
8482           }
8483
8484         case 'n':
8485           if (name[1] == 'e' &&
8486               name[2] == 'x' &&
8487               name[3] == 't')
8488           {                                       /* next       */
8489             return KEY_next;
8490           }
8491
8492           goto unknown;
8493
8494         case 'o':
8495           if (name[1] == 'p' &&
8496               name[2] == 'e' &&
8497               name[3] == 'n')
8498           {                                       /* open       */
8499             return -KEY_open;
8500           }
8501
8502           goto unknown;
8503
8504         case 'p':
8505           switch (name[1])
8506           {
8507             case 'a':
8508               if (name[2] == 'c' &&
8509                   name[3] == 'k')
8510               {                                   /* pack       */
8511                 return -KEY_pack;
8512               }
8513
8514               goto unknown;
8515
8516             case 'i':
8517               if (name[2] == 'p' &&
8518                   name[3] == 'e')
8519               {                                   /* pipe       */
8520                 return -KEY_pipe;
8521               }
8522
8523               goto unknown;
8524
8525             case 'u':
8526               if (name[2] == 's' &&
8527                   name[3] == 'h')
8528               {                                   /* push       */
8529                 return -KEY_push;
8530               }
8531
8532               goto unknown;
8533
8534             default:
8535               goto unknown;
8536           }
8537
8538         case 'r':
8539           switch (name[1])
8540           {
8541             case 'a':
8542               if (name[2] == 'n' &&
8543                   name[3] == 'd')
8544               {                                   /* rand       */
8545                 return -KEY_rand;
8546               }
8547
8548               goto unknown;
8549
8550             case 'e':
8551               switch (name[2])
8552               {
8553                 case 'a':
8554                   if (name[3] == 'd')
8555                   {                               /* read       */
8556                     return -KEY_read;
8557                   }
8558
8559                   goto unknown;
8560
8561                 case 'c':
8562                   if (name[3] == 'v')
8563                   {                               /* recv       */
8564                     return -KEY_recv;
8565                   }
8566
8567                   goto unknown;
8568
8569                 case 'd':
8570                   if (name[3] == 'o')
8571                   {                               /* redo       */
8572                     return KEY_redo;
8573                   }
8574
8575                   goto unknown;
8576
8577                 default:
8578                   goto unknown;
8579               }
8580
8581             default:
8582               goto unknown;
8583           }
8584
8585         case 's':
8586           switch (name[1])
8587           {
8588             case 'e':
8589               switch (name[2])
8590               {
8591                 case 'e':
8592                   if (name[3] == 'k')
8593                   {                               /* seek       */
8594                     return -KEY_seek;
8595                   }
8596
8597                   goto unknown;
8598
8599                 case 'n':
8600                   if (name[3] == 'd')
8601                   {                               /* send       */
8602                     return -KEY_send;
8603                   }
8604
8605                   goto unknown;
8606
8607                 default:
8608                   goto unknown;
8609               }
8610
8611             case 'o':
8612               if (name[2] == 'r' &&
8613                   name[3] == 't')
8614               {                                   /* sort       */
8615                 return KEY_sort;
8616               }
8617
8618               goto unknown;
8619
8620             case 'q':
8621               if (name[2] == 'r' &&
8622                   name[3] == 't')
8623               {                                   /* sqrt       */
8624                 return -KEY_sqrt;
8625               }
8626
8627               goto unknown;
8628
8629             case 't':
8630               if (name[2] == 'a' &&
8631                   name[3] == 't')
8632               {                                   /* stat       */
8633                 return -KEY_stat;
8634               }
8635
8636               goto unknown;
8637
8638             default:
8639               goto unknown;
8640           }
8641
8642         case 't':
8643           switch (name[1])
8644           {
8645             case 'e':
8646               if (name[2] == 'l' &&
8647                   name[3] == 'l')
8648               {                                   /* tell       */
8649                 return -KEY_tell;
8650               }
8651
8652               goto unknown;
8653
8654             case 'i':
8655               switch (name[2])
8656               {
8657                 case 'e':
8658                   if (name[3] == 'd')
8659                   {                               /* tied       */
8660                     return KEY_tied;
8661                   }
8662
8663                   goto unknown;
8664
8665                 case 'm':
8666                   if (name[3] == 'e')
8667                   {                               /* time       */
8668                     return -KEY_time;
8669                   }
8670
8671                   goto unknown;
8672
8673                 default:
8674                   goto unknown;
8675               }
8676
8677             default:
8678               goto unknown;
8679           }
8680
8681         case 'w':
8682           switch (name[1])
8683           {
8684             case 'a':
8685               switch (name[2])
8686               {
8687                 case 'i':
8688                   if (name[3] == 't')
8689                   {                               /* wait       */
8690                     return -KEY_wait;
8691                   }
8692
8693                   goto unknown;
8694
8695                 case 'r':
8696                   if (name[3] == 'n')
8697                   {                               /* warn       */
8698                     return -KEY_warn;
8699                   }
8700
8701                   goto unknown;
8702
8703                 default:
8704                   goto unknown;
8705               }
8706
8707             case 'h':
8708               if (name[2] == 'e' &&
8709                   name[3] == 'n')
8710               {                                   /* when       */
8711                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8712               }
8713
8714               goto unknown;
8715
8716             default:
8717               goto unknown;
8718           }
8719
8720         default:
8721           goto unknown;
8722       }
8723
8724     case 5: /* 39 tokens of length 5 */
8725       switch (name[0])
8726       {
8727         case 'B':
8728           if (name[1] == 'E' &&
8729               name[2] == 'G' &&
8730               name[3] == 'I' &&
8731               name[4] == 'N')
8732           {                                       /* BEGIN      */
8733             return KEY_BEGIN;
8734           }
8735
8736           goto unknown;
8737
8738         case 'C':
8739           if (name[1] == 'H' &&
8740               name[2] == 'E' &&
8741               name[3] == 'C' &&
8742               name[4] == 'K')
8743           {                                       /* CHECK      */
8744             return KEY_CHECK;
8745           }
8746
8747           goto unknown;
8748
8749         case 'a':
8750           switch (name[1])
8751           {
8752             case 'l':
8753               if (name[2] == 'a' &&
8754                   name[3] == 'r' &&
8755                   name[4] == 'm')
8756               {                                   /* alarm      */
8757                 return -KEY_alarm;
8758               }
8759
8760               goto unknown;
8761
8762             case 't':
8763               if (name[2] == 'a' &&
8764                   name[3] == 'n' &&
8765                   name[4] == '2')
8766               {                                   /* atan2      */
8767                 return -KEY_atan2;
8768               }
8769
8770               goto unknown;
8771
8772             default:
8773               goto unknown;
8774           }
8775
8776         case 'b':
8777           switch (name[1])
8778           {
8779             case 'l':
8780               if (name[2] == 'e' &&
8781                   name[3] == 's' &&
8782                   name[4] == 's')
8783               {                                   /* bless      */
8784                 return -KEY_bless;
8785               }
8786
8787               goto unknown;
8788
8789             case 'r':
8790               if (name[2] == 'e' &&
8791                   name[3] == 'a' &&
8792                   name[4] == 'k')
8793               {                                   /* break      */
8794                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8795               }
8796
8797               goto unknown;
8798
8799             default:
8800               goto unknown;
8801           }
8802
8803         case 'c':
8804           switch (name[1])
8805           {
8806             case 'h':
8807               switch (name[2])
8808               {
8809                 case 'd':
8810                   if (name[3] == 'i' &&
8811                       name[4] == 'r')
8812                   {                               /* chdir      */
8813                     return -KEY_chdir;
8814                   }
8815
8816                   goto unknown;
8817
8818                 case 'm':
8819                   if (name[3] == 'o' &&
8820                       name[4] == 'd')
8821                   {                               /* chmod      */
8822                     return -KEY_chmod;
8823                   }
8824
8825                   goto unknown;
8826
8827                 case 'o':
8828                   switch (name[3])
8829                   {
8830                     case 'm':
8831                       if (name[4] == 'p')
8832                       {                           /* chomp      */
8833                         return -KEY_chomp;
8834                       }
8835
8836                       goto unknown;
8837
8838                     case 'w':
8839                       if (name[4] == 'n')
8840                       {                           /* chown      */
8841                         return -KEY_chown;
8842                       }
8843
8844                       goto unknown;
8845
8846                     default:
8847                       goto unknown;
8848                   }
8849
8850                 default:
8851                   goto unknown;
8852               }
8853
8854             case 'l':
8855               if (name[2] == 'o' &&
8856                   name[3] == 's' &&
8857                   name[4] == 'e')
8858               {                                   /* close      */
8859                 return -KEY_close;
8860               }
8861
8862               goto unknown;
8863
8864             case 'r':
8865               if (name[2] == 'y' &&
8866                   name[3] == 'p' &&
8867                   name[4] == 't')
8868               {                                   /* crypt      */
8869                 return -KEY_crypt;
8870               }
8871
8872               goto unknown;
8873
8874             default:
8875               goto unknown;
8876           }
8877
8878         case 'e':
8879           if (name[1] == 'l' &&
8880               name[2] == 's' &&
8881               name[3] == 'i' &&
8882               name[4] == 'f')
8883           {                                       /* elsif      */
8884             return KEY_elsif;
8885           }
8886
8887           goto unknown;
8888
8889         case 'f':
8890           switch (name[1])
8891           {
8892             case 'c':
8893               if (name[2] == 'n' &&
8894                   name[3] == 't' &&
8895                   name[4] == 'l')
8896               {                                   /* fcntl      */
8897                 return -KEY_fcntl;
8898               }
8899
8900               goto unknown;
8901
8902             case 'l':
8903               if (name[2] == 'o' &&
8904                   name[3] == 'c' &&
8905                   name[4] == 'k')
8906               {                                   /* flock      */
8907                 return -KEY_flock;
8908               }
8909
8910               goto unknown;
8911
8912             default:
8913               goto unknown;
8914           }
8915
8916         case 'g':
8917           if (name[1] == 'i' &&
8918               name[2] == 'v' &&
8919               name[3] == 'e' &&
8920               name[4] == 'n')
8921           {                                       /* given      */
8922             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8923           }
8924
8925           goto unknown;
8926
8927         case 'i':
8928           switch (name[1])
8929           {
8930             case 'n':
8931               if (name[2] == 'd' &&
8932                   name[3] == 'e' &&
8933                   name[4] == 'x')
8934               {                                   /* index      */
8935                 return -KEY_index;
8936               }
8937
8938               goto unknown;
8939
8940             case 'o':
8941               if (name[2] == 'c' &&
8942                   name[3] == 't' &&
8943                   name[4] == 'l')
8944               {                                   /* ioctl      */
8945                 return -KEY_ioctl;
8946               }
8947
8948               goto unknown;
8949
8950             default:
8951               goto unknown;
8952           }
8953
8954         case 'l':
8955           switch (name[1])
8956           {
8957             case 'o':
8958               if (name[2] == 'c' &&
8959                   name[3] == 'a' &&
8960                   name[4] == 'l')
8961               {                                   /* local      */
8962                 return KEY_local;
8963               }
8964
8965               goto unknown;
8966
8967             case 's':
8968               if (name[2] == 't' &&
8969                   name[3] == 'a' &&
8970                   name[4] == 't')
8971               {                                   /* lstat      */
8972                 return -KEY_lstat;
8973               }
8974
8975               goto unknown;
8976
8977             default:
8978               goto unknown;
8979           }
8980
8981         case 'm':
8982           if (name[1] == 'k' &&
8983               name[2] == 'd' &&
8984               name[3] == 'i' &&
8985               name[4] == 'r')
8986           {                                       /* mkdir      */
8987             return -KEY_mkdir;
8988           }
8989
8990           goto unknown;
8991
8992         case 'p':
8993           if (name[1] == 'r' &&
8994               name[2] == 'i' &&
8995               name[3] == 'n' &&
8996               name[4] == 't')
8997           {                                       /* print      */
8998             return KEY_print;
8999           }
9000
9001           goto unknown;
9002
9003         case 'r':
9004           switch (name[1])
9005           {
9006             case 'e':
9007               if (name[2] == 's' &&
9008                   name[3] == 'e' &&
9009                   name[4] == 't')
9010               {                                   /* reset      */
9011                 return -KEY_reset;
9012               }
9013
9014               goto unknown;
9015
9016             case 'm':
9017               if (name[2] == 'd' &&
9018                   name[3] == 'i' &&
9019                   name[4] == 'r')
9020               {                                   /* rmdir      */
9021                 return -KEY_rmdir;
9022               }
9023
9024               goto unknown;
9025
9026             default:
9027               goto unknown;
9028           }
9029
9030         case 's':
9031           switch (name[1])
9032           {
9033             case 'e':
9034               if (name[2] == 'm' &&
9035                   name[3] == 'o' &&
9036                   name[4] == 'p')
9037               {                                   /* semop      */
9038                 return -KEY_semop;
9039               }
9040
9041               goto unknown;
9042
9043             case 'h':
9044               if (name[2] == 'i' &&
9045                   name[3] == 'f' &&
9046                   name[4] == 't')
9047               {                                   /* shift      */
9048                 return -KEY_shift;
9049               }
9050
9051               goto unknown;
9052
9053             case 'l':
9054               if (name[2] == 'e' &&
9055                   name[3] == 'e' &&
9056                   name[4] == 'p')
9057               {                                   /* sleep      */
9058                 return -KEY_sleep;
9059               }
9060
9061               goto unknown;
9062
9063             case 'p':
9064               if (name[2] == 'l' &&
9065                   name[3] == 'i' &&
9066                   name[4] == 't')
9067               {                                   /* split      */
9068                 return KEY_split;
9069               }
9070
9071               goto unknown;
9072
9073             case 'r':
9074               if (name[2] == 'a' &&
9075                   name[3] == 'n' &&
9076                   name[4] == 'd')
9077               {                                   /* srand      */
9078                 return -KEY_srand;
9079               }
9080
9081               goto unknown;
9082
9083             case 't':
9084               switch (name[2])
9085               {
9086                 case 'a':
9087                   if (name[3] == 't' &&
9088                       name[4] == 'e')
9089                   {                               /* state      */
9090                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9091                   }
9092
9093                   goto unknown;
9094
9095                 case 'u':
9096                   if (name[3] == 'd' &&
9097                       name[4] == 'y')
9098                   {                               /* study      */
9099                     return KEY_study;
9100                   }
9101
9102                   goto unknown;
9103
9104                 default:
9105                   goto unknown;
9106               }
9107
9108             default:
9109               goto unknown;
9110           }
9111
9112         case 't':
9113           if (name[1] == 'i' &&
9114               name[2] == 'm' &&
9115               name[3] == 'e' &&
9116               name[4] == 's')
9117           {                                       /* times      */
9118             return -KEY_times;
9119           }
9120
9121           goto unknown;
9122
9123         case 'u':
9124           switch (name[1])
9125           {
9126             case 'm':
9127               if (name[2] == 'a' &&
9128                   name[3] == 's' &&
9129                   name[4] == 'k')
9130               {                                   /* umask      */
9131                 return -KEY_umask;
9132               }
9133
9134               goto unknown;
9135
9136             case 'n':
9137               switch (name[2])
9138               {
9139                 case 'd':
9140                   if (name[3] == 'e' &&
9141                       name[4] == 'f')
9142                   {                               /* undef      */
9143                     return KEY_undef;
9144                   }
9145
9146                   goto unknown;
9147
9148                 case 't':
9149                   if (name[3] == 'i')
9150                   {
9151                     switch (name[4])
9152                     {
9153                       case 'e':
9154                         {                         /* untie      */
9155                           return KEY_untie;
9156                         }
9157
9158                       case 'l':
9159                         {                         /* until      */
9160                           return KEY_until;
9161                         }
9162
9163                       default:
9164                         goto unknown;
9165                     }
9166                   }
9167
9168                   goto unknown;
9169
9170                 default:
9171                   goto unknown;
9172               }
9173
9174             case 't':
9175               if (name[2] == 'i' &&
9176                   name[3] == 'm' &&
9177                   name[4] == 'e')
9178               {                                   /* utime      */
9179                 return -KEY_utime;
9180               }
9181
9182               goto unknown;
9183
9184             default:
9185               goto unknown;
9186           }
9187
9188         case 'w':
9189           switch (name[1])
9190           {
9191             case 'h':
9192               if (name[2] == 'i' &&
9193                   name[3] == 'l' &&
9194                   name[4] == 'e')
9195               {                                   /* while      */
9196                 return KEY_while;
9197               }
9198
9199               goto unknown;
9200
9201             case 'r':
9202               if (name[2] == 'i' &&
9203                   name[3] == 't' &&
9204                   name[4] == 'e')
9205               {                                   /* write      */
9206                 return -KEY_write;
9207               }
9208
9209               goto unknown;
9210
9211             default:
9212               goto unknown;
9213           }
9214
9215         default:
9216           goto unknown;
9217       }
9218
9219     case 6: /* 33 tokens of length 6 */
9220       switch (name[0])
9221       {
9222         case 'a':
9223           if (name[1] == 'c' &&
9224               name[2] == 'c' &&
9225               name[3] == 'e' &&
9226               name[4] == 'p' &&
9227               name[5] == 't')
9228           {                                       /* accept     */
9229             return -KEY_accept;
9230           }
9231
9232           goto unknown;
9233
9234         case 'c':
9235           switch (name[1])
9236           {
9237             case 'a':
9238               if (name[2] == 'l' &&
9239                   name[3] == 'l' &&
9240                   name[4] == 'e' &&
9241                   name[5] == 'r')
9242               {                                   /* caller     */
9243                 return -KEY_caller;
9244               }
9245
9246               goto unknown;
9247
9248             case 'h':
9249               if (name[2] == 'r' &&
9250                   name[3] == 'o' &&
9251                   name[4] == 'o' &&
9252                   name[5] == 't')
9253               {                                   /* chroot     */
9254                 return -KEY_chroot;
9255               }
9256
9257               goto unknown;
9258
9259             default:
9260               goto unknown;
9261           }
9262
9263         case 'd':
9264           if (name[1] == 'e' &&
9265               name[2] == 'l' &&
9266               name[3] == 'e' &&
9267               name[4] == 't' &&
9268               name[5] == 'e')
9269           {                                       /* delete     */
9270             return KEY_delete;
9271           }
9272
9273           goto unknown;
9274
9275         case 'e':
9276           switch (name[1])
9277           {
9278             case 'l':
9279               if (name[2] == 's' &&
9280                   name[3] == 'e' &&
9281                   name[4] == 'i' &&
9282                   name[5] == 'f')
9283               {                                   /* elseif     */
9284                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9285               }
9286
9287               goto unknown;
9288
9289             case 'x':
9290               if (name[2] == 'i' &&
9291                   name[3] == 's' &&
9292                   name[4] == 't' &&
9293                   name[5] == 's')
9294               {                                   /* exists     */
9295                 return KEY_exists;
9296               }
9297
9298               goto unknown;
9299
9300             default:
9301               goto unknown;
9302           }
9303
9304         case 'f':
9305           switch (name[1])
9306           {
9307             case 'i':
9308               if (name[2] == 'l' &&
9309                   name[3] == 'e' &&
9310                   name[4] == 'n' &&
9311                   name[5] == 'o')
9312               {                                   /* fileno     */
9313                 return -KEY_fileno;
9314               }
9315
9316               goto unknown;
9317
9318             case 'o':
9319               if (name[2] == 'r' &&
9320                   name[3] == 'm' &&
9321                   name[4] == 'a' &&
9322                   name[5] == 't')
9323               {                                   /* format     */
9324                 return KEY_format;
9325               }
9326
9327               goto unknown;
9328
9329             default:
9330               goto unknown;
9331           }
9332
9333         case 'g':
9334           if (name[1] == 'm' &&
9335               name[2] == 't' &&
9336               name[3] == 'i' &&
9337               name[4] == 'm' &&
9338               name[5] == 'e')
9339           {                                       /* gmtime     */
9340             return -KEY_gmtime;
9341           }
9342
9343           goto unknown;
9344
9345         case 'l':
9346           switch (name[1])
9347           {
9348             case 'e':
9349               if (name[2] == 'n' &&
9350                   name[3] == 'g' &&
9351                   name[4] == 't' &&
9352                   name[5] == 'h')
9353               {                                   /* length     */
9354                 return -KEY_length;
9355               }
9356
9357               goto unknown;
9358
9359             case 'i':
9360               if (name[2] == 's' &&
9361                   name[3] == 't' &&
9362                   name[4] == 'e' &&
9363                   name[5] == 'n')
9364               {                                   /* listen     */
9365                 return -KEY_listen;
9366               }
9367
9368               goto unknown;
9369
9370             default:
9371               goto unknown;
9372           }
9373
9374         case 'm':
9375           if (name[1] == 's' &&
9376               name[2] == 'g')
9377           {
9378             switch (name[3])
9379             {
9380               case 'c':
9381                 if (name[4] == 't' &&
9382                     name[5] == 'l')
9383                 {                                 /* msgctl     */
9384                   return -KEY_msgctl;
9385                 }
9386
9387                 goto unknown;
9388
9389               case 'g':
9390                 if (name[4] == 'e' &&
9391                     name[5] == 't')
9392                 {                                 /* msgget     */
9393                   return -KEY_msgget;
9394                 }
9395
9396                 goto unknown;
9397
9398               case 'r':
9399                 if (name[4] == 'c' &&
9400                     name[5] == 'v')
9401                 {                                 /* msgrcv     */
9402                   return -KEY_msgrcv;
9403                 }
9404
9405                 goto unknown;
9406
9407               case 's':
9408                 if (name[4] == 'n' &&
9409                     name[5] == 'd')
9410                 {                                 /* msgsnd     */
9411                   return -KEY_msgsnd;
9412                 }
9413
9414                 goto unknown;
9415
9416               default:
9417                 goto unknown;
9418             }
9419           }
9420
9421           goto unknown;
9422
9423         case 'p':
9424           if (name[1] == 'r' &&
9425               name[2] == 'i' &&
9426               name[3] == 'n' &&
9427               name[4] == 't' &&
9428               name[5] == 'f')
9429           {                                       /* printf     */
9430             return KEY_printf;
9431           }
9432
9433           goto unknown;
9434
9435         case 'r':
9436           switch (name[1])
9437           {
9438             case 'e':
9439               switch (name[2])
9440               {
9441                 case 'n':
9442                   if (name[3] == 'a' &&
9443                       name[4] == 'm' &&
9444                       name[5] == 'e')
9445                   {                               /* rename     */
9446                     return -KEY_rename;
9447                   }
9448
9449                   goto unknown;
9450
9451                 case 't':
9452                   if (name[3] == 'u' &&
9453                       name[4] == 'r' &&
9454                       name[5] == 'n')
9455                   {                               /* return     */
9456                     return KEY_return;
9457                   }
9458
9459                   goto unknown;
9460
9461                 default:
9462                   goto unknown;
9463               }
9464
9465             case 'i':
9466               if (name[2] == 'n' &&
9467                   name[3] == 'd' &&
9468                   name[4] == 'e' &&
9469                   name[5] == 'x')
9470               {                                   /* rindex     */
9471                 return -KEY_rindex;
9472               }
9473
9474               goto unknown;
9475
9476             default:
9477               goto unknown;
9478           }
9479
9480         case 's':
9481           switch (name[1])
9482           {
9483             case 'c':
9484               if (name[2] == 'a' &&
9485                   name[3] == 'l' &&
9486                   name[4] == 'a' &&
9487                   name[5] == 'r')
9488               {                                   /* scalar     */
9489                 return KEY_scalar;
9490               }
9491
9492               goto unknown;
9493
9494             case 'e':
9495               switch (name[2])
9496               {
9497                 case 'l':
9498                   if (name[3] == 'e' &&
9499                       name[4] == 'c' &&
9500                       name[5] == 't')
9501                   {                               /* select     */
9502                     return -KEY_select;
9503                   }
9504
9505                   goto unknown;
9506
9507                 case 'm':
9508                   switch (name[3])
9509                   {
9510                     case 'c':
9511                       if (name[4] == 't' &&
9512                           name[5] == 'l')
9513                       {                           /* semctl     */
9514                         return -KEY_semctl;
9515                       }
9516
9517                       goto unknown;
9518
9519                     case 'g':
9520                       if (name[4] == 'e' &&
9521                           name[5] == 't')
9522                       {                           /* semget     */
9523                         return -KEY_semget;
9524                       }
9525
9526                       goto unknown;
9527
9528                     default:
9529                       goto unknown;
9530                   }
9531
9532                 default:
9533                   goto unknown;
9534               }
9535
9536             case 'h':
9537               if (name[2] == 'm')
9538               {
9539                 switch (name[3])
9540                 {
9541                   case 'c':
9542                     if (name[4] == 't' &&
9543                         name[5] == 'l')
9544                     {                             /* shmctl     */
9545                       return -KEY_shmctl;
9546                     }
9547
9548                     goto unknown;
9549
9550                   case 'g':
9551                     if (name[4] == 'e' &&
9552                         name[5] == 't')
9553                     {                             /* shmget     */
9554                       return -KEY_shmget;
9555                     }
9556
9557                     goto unknown;
9558
9559                   default:
9560                     goto unknown;
9561                 }
9562               }
9563
9564               goto unknown;
9565
9566             case 'o':
9567               if (name[2] == 'c' &&
9568                   name[3] == 'k' &&
9569                   name[4] == 'e' &&
9570                   name[5] == 't')
9571               {                                   /* socket     */
9572                 return -KEY_socket;
9573               }
9574
9575               goto unknown;
9576
9577             case 'p':
9578               if (name[2] == 'l' &&
9579                   name[3] == 'i' &&
9580                   name[4] == 'c' &&
9581                   name[5] == 'e')
9582               {                                   /* splice     */
9583                 return -KEY_splice;
9584               }
9585
9586               goto unknown;
9587
9588             case 'u':
9589               if (name[2] == 'b' &&
9590                   name[3] == 's' &&
9591                   name[4] == 't' &&
9592                   name[5] == 'r')
9593               {                                   /* substr     */
9594                 return -KEY_substr;
9595               }
9596
9597               goto unknown;
9598
9599             case 'y':
9600               if (name[2] == 's' &&
9601                   name[3] == 't' &&
9602                   name[4] == 'e' &&
9603                   name[5] == 'm')
9604               {                                   /* system     */
9605                 return -KEY_system;
9606               }
9607
9608               goto unknown;
9609
9610             default:
9611               goto unknown;
9612           }
9613
9614         case 'u':
9615           if (name[1] == 'n')
9616           {
9617             switch (name[2])
9618             {
9619               case 'l':
9620                 switch (name[3])
9621                 {
9622                   case 'e':
9623                     if (name[4] == 's' &&
9624                         name[5] == 's')
9625                     {                             /* unless     */
9626                       return KEY_unless;
9627                     }
9628
9629                     goto unknown;
9630
9631                   case 'i':
9632                     if (name[4] == 'n' &&
9633                         name[5] == 'k')
9634                     {                             /* unlink     */
9635                       return -KEY_unlink;
9636                     }
9637
9638                     goto unknown;
9639
9640                   default:
9641                     goto unknown;
9642                 }
9643
9644               case 'p':
9645                 if (name[3] == 'a' &&
9646                     name[4] == 'c' &&
9647                     name[5] == 'k')
9648                 {                                 /* unpack     */
9649                   return -KEY_unpack;
9650                 }
9651
9652                 goto unknown;
9653
9654               default:
9655                 goto unknown;
9656             }
9657           }
9658
9659           goto unknown;
9660
9661         case 'v':
9662           if (name[1] == 'a' &&
9663               name[2] == 'l' &&
9664               name[3] == 'u' &&
9665               name[4] == 'e' &&
9666               name[5] == 's')
9667           {                                       /* values     */
9668             return -KEY_values;
9669           }
9670
9671           goto unknown;
9672
9673         default:
9674           goto unknown;
9675       }
9676
9677     case 7: /* 29 tokens of length 7 */
9678       switch (name[0])
9679       {
9680         case 'D':
9681           if (name[1] == 'E' &&
9682               name[2] == 'S' &&
9683               name[3] == 'T' &&
9684               name[4] == 'R' &&
9685               name[5] == 'O' &&
9686               name[6] == 'Y')
9687           {                                       /* DESTROY    */
9688             return KEY_DESTROY;
9689           }
9690
9691           goto unknown;
9692
9693         case '_':
9694           if (name[1] == '_' &&
9695               name[2] == 'E' &&
9696               name[3] == 'N' &&
9697               name[4] == 'D' &&
9698               name[5] == '_' &&
9699               name[6] == '_')
9700           {                                       /* __END__    */
9701             return KEY___END__;
9702           }
9703
9704           goto unknown;
9705
9706         case 'b':
9707           if (name[1] == 'i' &&
9708               name[2] == 'n' &&
9709               name[3] == 'm' &&
9710               name[4] == 'o' &&
9711               name[5] == 'd' &&
9712               name[6] == 'e')
9713           {                                       /* binmode    */
9714             return -KEY_binmode;
9715           }
9716
9717           goto unknown;
9718
9719         case 'c':
9720           if (name[1] == 'o' &&
9721               name[2] == 'n' &&
9722               name[3] == 'n' &&
9723               name[4] == 'e' &&
9724               name[5] == 'c' &&
9725               name[6] == 't')
9726           {                                       /* connect    */
9727             return -KEY_connect;
9728           }
9729
9730           goto unknown;
9731
9732         case 'd':
9733           switch (name[1])
9734           {
9735             case 'b':
9736               if (name[2] == 'm' &&
9737                   name[3] == 'o' &&
9738                   name[4] == 'p' &&
9739                   name[5] == 'e' &&
9740                   name[6] == 'n')
9741               {                                   /* dbmopen    */
9742                 return -KEY_dbmopen;
9743               }
9744
9745               goto unknown;
9746
9747             case 'e':
9748               if (name[2] == 'f')
9749               {
9750                 switch (name[3])
9751                 {
9752                   case 'a':
9753                     if (name[4] == 'u' &&
9754                         name[5] == 'l' &&
9755                         name[6] == 't')
9756                     {                             /* default    */
9757                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9758                     }
9759
9760                     goto unknown;
9761
9762                   case 'i':
9763                     if (name[4] == 'n' &&
9764                         name[5] == 'e' &&
9765                         name[6] == 'd')
9766                     {                             /* defined    */
9767                       return KEY_defined;
9768                     }
9769
9770                     goto unknown;
9771
9772                   default:
9773                     goto unknown;
9774                 }
9775               }
9776
9777               goto unknown;
9778
9779             default:
9780               goto unknown;
9781           }
9782
9783         case 'f':
9784           if (name[1] == 'o' &&
9785               name[2] == 'r' &&
9786               name[3] == 'e' &&
9787               name[4] == 'a' &&
9788               name[5] == 'c' &&
9789               name[6] == 'h')
9790           {                                       /* foreach    */
9791             return KEY_foreach;
9792           }
9793
9794           goto unknown;
9795
9796         case 'g':
9797           if (name[1] == 'e' &&
9798               name[2] == 't' &&
9799               name[3] == 'p')
9800           {
9801             switch (name[4])
9802             {
9803               case 'g':
9804                 if (name[5] == 'r' &&
9805                     name[6] == 'p')
9806                 {                                 /* getpgrp    */
9807                   return -KEY_getpgrp;
9808                 }
9809
9810                 goto unknown;
9811
9812               case 'p':
9813                 if (name[5] == 'i' &&
9814                     name[6] == 'd')
9815                 {                                 /* getppid    */
9816                   return -KEY_getppid;
9817                 }
9818
9819                 goto unknown;
9820
9821               default:
9822                 goto unknown;
9823             }
9824           }
9825
9826           goto unknown;
9827
9828         case 'l':
9829           if (name[1] == 'c' &&
9830               name[2] == 'f' &&
9831               name[3] == 'i' &&
9832               name[4] == 'r' &&
9833               name[5] == 's' &&
9834               name[6] == 't')
9835           {                                       /* lcfirst    */
9836             return -KEY_lcfirst;
9837           }
9838
9839           goto unknown;
9840
9841         case 'o':
9842           if (name[1] == 'p' &&
9843               name[2] == 'e' &&
9844               name[3] == 'n' &&
9845               name[4] == 'd' &&
9846               name[5] == 'i' &&
9847               name[6] == 'r')
9848           {                                       /* opendir    */
9849             return -KEY_opendir;
9850           }
9851
9852           goto unknown;
9853
9854         case 'p':
9855           if (name[1] == 'a' &&
9856               name[2] == 'c' &&
9857               name[3] == 'k' &&
9858               name[4] == 'a' &&
9859               name[5] == 'g' &&
9860               name[6] == 'e')
9861           {                                       /* package    */
9862             return KEY_package;
9863           }
9864
9865           goto unknown;
9866
9867         case 'r':
9868           if (name[1] == 'e')
9869           {
9870             switch (name[2])
9871             {
9872               case 'a':
9873                 if (name[3] == 'd' &&
9874                     name[4] == 'd' &&
9875                     name[5] == 'i' &&
9876                     name[6] == 'r')
9877                 {                                 /* readdir    */
9878                   return -KEY_readdir;
9879                 }
9880
9881                 goto unknown;
9882
9883               case 'q':
9884                 if (name[3] == 'u' &&
9885                     name[4] == 'i' &&
9886                     name[5] == 'r' &&
9887                     name[6] == 'e')
9888                 {                                 /* require    */
9889                   return KEY_require;
9890                 }
9891
9892                 goto unknown;
9893
9894               case 'v':
9895                 if (name[3] == 'e' &&
9896                     name[4] == 'r' &&
9897                     name[5] == 's' &&
9898                     name[6] == 'e')
9899                 {                                 /* reverse    */
9900                   return -KEY_reverse;
9901                 }
9902
9903                 goto unknown;
9904
9905               default:
9906                 goto unknown;
9907             }
9908           }
9909
9910           goto unknown;
9911
9912         case 's':
9913           switch (name[1])
9914           {
9915             case 'e':
9916               switch (name[2])
9917               {
9918                 case 'e':
9919                   if (name[3] == 'k' &&
9920                       name[4] == 'd' &&
9921                       name[5] == 'i' &&
9922                       name[6] == 'r')
9923                   {                               /* seekdir    */
9924                     return -KEY_seekdir;
9925                   }
9926
9927                   goto unknown;
9928
9929                 case 't':
9930                   if (name[3] == 'p' &&
9931                       name[4] == 'g' &&
9932                       name[5] == 'r' &&
9933                       name[6] == 'p')
9934                   {                               /* setpgrp    */
9935                     return -KEY_setpgrp;
9936                   }
9937
9938                   goto unknown;
9939
9940                 default:
9941                   goto unknown;
9942               }
9943
9944             case 'h':
9945               if (name[2] == 'm' &&
9946                   name[3] == 'r' &&
9947                   name[4] == 'e' &&
9948                   name[5] == 'a' &&
9949                   name[6] == 'd')
9950               {                                   /* shmread    */
9951                 return -KEY_shmread;
9952               }
9953
9954               goto unknown;
9955
9956             case 'p':
9957               if (name[2] == 'r' &&
9958                   name[3] == 'i' &&
9959                   name[4] == 'n' &&
9960                   name[5] == 't' &&
9961                   name[6] == 'f')
9962               {                                   /* sprintf    */
9963                 return -KEY_sprintf;
9964               }
9965
9966               goto unknown;
9967
9968             case 'y':
9969               switch (name[2])
9970               {
9971                 case 'm':
9972                   if (name[3] == 'l' &&
9973                       name[4] == 'i' &&
9974                       name[5] == 'n' &&
9975                       name[6] == 'k')
9976                   {                               /* symlink    */
9977                     return -KEY_symlink;
9978                   }
9979
9980                   goto unknown;
9981
9982                 case 's':
9983                   switch (name[3])
9984                   {
9985                     case 'c':
9986                       if (name[4] == 'a' &&
9987                           name[5] == 'l' &&
9988                           name[6] == 'l')
9989                       {                           /* syscall    */
9990                         return -KEY_syscall;
9991                       }
9992
9993                       goto unknown;
9994
9995                     case 'o':
9996                       if (name[4] == 'p' &&
9997                           name[5] == 'e' &&
9998                           name[6] == 'n')
9999                       {                           /* sysopen    */
10000                         return -KEY_sysopen;
10001                       }
10002
10003                       goto unknown;
10004
10005                     case 'r':
10006                       if (name[4] == 'e' &&
10007                           name[5] == 'a' &&
10008                           name[6] == 'd')
10009                       {                           /* sysread    */
10010                         return -KEY_sysread;
10011                       }
10012
10013                       goto unknown;
10014
10015                     case 's':
10016                       if (name[4] == 'e' &&
10017                           name[5] == 'e' &&
10018                           name[6] == 'k')
10019                       {                           /* sysseek    */
10020                         return -KEY_sysseek;
10021                       }
10022
10023                       goto unknown;
10024
10025                     default:
10026                       goto unknown;
10027                   }
10028
10029                 default:
10030                   goto unknown;
10031               }
10032
10033             default:
10034               goto unknown;
10035           }
10036
10037         case 't':
10038           if (name[1] == 'e' &&
10039               name[2] == 'l' &&
10040               name[3] == 'l' &&
10041               name[4] == 'd' &&
10042               name[5] == 'i' &&
10043               name[6] == 'r')
10044           {                                       /* telldir    */
10045             return -KEY_telldir;
10046           }
10047
10048           goto unknown;
10049
10050         case 'u':
10051           switch (name[1])
10052           {
10053             case 'c':
10054               if (name[2] == 'f' &&
10055                   name[3] == 'i' &&
10056                   name[4] == 'r' &&
10057                   name[5] == 's' &&
10058                   name[6] == 't')
10059               {                                   /* ucfirst    */
10060                 return -KEY_ucfirst;
10061               }
10062
10063               goto unknown;
10064
10065             case 'n':
10066               if (name[2] == 's' &&
10067                   name[3] == 'h' &&
10068                   name[4] == 'i' &&
10069                   name[5] == 'f' &&
10070                   name[6] == 't')
10071               {                                   /* unshift    */
10072                 return -KEY_unshift;
10073               }
10074
10075               goto unknown;
10076
10077             default:
10078               goto unknown;
10079           }
10080
10081         case 'w':
10082           if (name[1] == 'a' &&
10083               name[2] == 'i' &&
10084               name[3] == 't' &&
10085               name[4] == 'p' &&
10086               name[5] == 'i' &&
10087               name[6] == 'd')
10088           {                                       /* waitpid    */
10089             return -KEY_waitpid;
10090           }
10091
10092           goto unknown;
10093
10094         default:
10095           goto unknown;
10096       }
10097
10098     case 8: /* 26 tokens of length 8 */
10099       switch (name[0])
10100       {
10101         case 'A':
10102           if (name[1] == 'U' &&
10103               name[2] == 'T' &&
10104               name[3] == 'O' &&
10105               name[4] == 'L' &&
10106               name[5] == 'O' &&
10107               name[6] == 'A' &&
10108               name[7] == 'D')
10109           {                                       /* AUTOLOAD   */
10110             return KEY_AUTOLOAD;
10111           }
10112
10113           goto unknown;
10114
10115         case '_':
10116           if (name[1] == '_')
10117           {
10118             switch (name[2])
10119             {
10120               case 'D':
10121                 if (name[3] == 'A' &&
10122                     name[4] == 'T' &&
10123                     name[5] == 'A' &&
10124                     name[6] == '_' &&
10125                     name[7] == '_')
10126                 {                                 /* __DATA__   */
10127                   return KEY___DATA__;
10128                 }
10129
10130                 goto unknown;
10131
10132               case 'F':
10133                 if (name[3] == 'I' &&
10134                     name[4] == 'L' &&
10135                     name[5] == 'E' &&
10136                     name[6] == '_' &&
10137                     name[7] == '_')
10138                 {                                 /* __FILE__   */
10139                   return -KEY___FILE__;
10140                 }
10141
10142                 goto unknown;
10143
10144               case 'L':
10145                 if (name[3] == 'I' &&
10146                     name[4] == 'N' &&
10147                     name[5] == 'E' &&
10148                     name[6] == '_' &&
10149                     name[7] == '_')
10150                 {                                 /* __LINE__   */
10151                   return -KEY___LINE__;
10152                 }
10153
10154                 goto unknown;
10155
10156               default:
10157                 goto unknown;
10158             }
10159           }
10160
10161           goto unknown;
10162
10163         case 'c':
10164           switch (name[1])
10165           {
10166             case 'l':
10167               if (name[2] == 'o' &&
10168                   name[3] == 's' &&
10169                   name[4] == 'e' &&
10170                   name[5] == 'd' &&
10171                   name[6] == 'i' &&
10172                   name[7] == 'r')
10173               {                                   /* closedir   */
10174                 return -KEY_closedir;
10175               }
10176
10177               goto unknown;
10178
10179             case 'o':
10180               if (name[2] == 'n' &&
10181                   name[3] == 't' &&
10182                   name[4] == 'i' &&
10183                   name[5] == 'n' &&
10184                   name[6] == 'u' &&
10185                   name[7] == 'e')
10186               {                                   /* continue   */
10187                 return -KEY_continue;
10188               }
10189
10190               goto unknown;
10191
10192             default:
10193               goto unknown;
10194           }
10195
10196         case 'd':
10197           if (name[1] == 'b' &&
10198               name[2] == 'm' &&
10199               name[3] == 'c' &&
10200               name[4] == 'l' &&
10201               name[5] == 'o' &&
10202               name[6] == 's' &&
10203               name[7] == 'e')
10204           {                                       /* dbmclose   */
10205             return -KEY_dbmclose;
10206           }
10207
10208           goto unknown;
10209
10210         case 'e':
10211           if (name[1] == 'n' &&
10212               name[2] == 'd')
10213           {
10214             switch (name[3])
10215             {
10216               case 'g':
10217                 if (name[4] == 'r' &&
10218                     name[5] == 'e' &&
10219                     name[6] == 'n' &&
10220                     name[7] == 't')
10221                 {                                 /* endgrent   */
10222                   return -KEY_endgrent;
10223                 }
10224
10225                 goto unknown;
10226
10227               case 'p':
10228                 if (name[4] == 'w' &&
10229                     name[5] == 'e' &&
10230                     name[6] == 'n' &&
10231                     name[7] == 't')
10232                 {                                 /* endpwent   */
10233                   return -KEY_endpwent;
10234                 }
10235
10236                 goto unknown;
10237
10238               default:
10239                 goto unknown;
10240             }
10241           }
10242
10243           goto unknown;
10244
10245         case 'f':
10246           if (name[1] == 'o' &&
10247               name[2] == 'r' &&
10248               name[3] == 'm' &&
10249               name[4] == 'l' &&
10250               name[5] == 'i' &&
10251               name[6] == 'n' &&
10252               name[7] == 'e')
10253           {                                       /* formline   */
10254             return -KEY_formline;
10255           }
10256
10257           goto unknown;
10258
10259         case 'g':
10260           if (name[1] == 'e' &&
10261               name[2] == 't')
10262           {
10263             switch (name[3])
10264             {
10265               case 'g':
10266                 if (name[4] == 'r')
10267                 {
10268                   switch (name[5])
10269                   {
10270                     case 'e':
10271                       if (name[6] == 'n' &&
10272                           name[7] == 't')
10273                       {                           /* getgrent   */
10274                         return -KEY_getgrent;
10275                       }
10276
10277                       goto unknown;
10278
10279                     case 'g':
10280                       if (name[6] == 'i' &&
10281                           name[7] == 'd')
10282                       {                           /* getgrgid   */
10283                         return -KEY_getgrgid;
10284                       }
10285
10286                       goto unknown;
10287
10288                     case 'n':
10289                       if (name[6] == 'a' &&
10290                           name[7] == 'm')
10291                       {                           /* getgrnam   */
10292                         return -KEY_getgrnam;
10293                       }
10294
10295                       goto unknown;
10296
10297                     default:
10298                       goto unknown;
10299                   }
10300                 }
10301
10302                 goto unknown;
10303
10304               case 'l':
10305                 if (name[4] == 'o' &&
10306                     name[5] == 'g' &&
10307                     name[6] == 'i' &&
10308                     name[7] == 'n')
10309                 {                                 /* getlogin   */
10310                   return -KEY_getlogin;
10311                 }
10312
10313                 goto unknown;
10314
10315               case 'p':
10316                 if (name[4] == 'w')
10317                 {
10318                   switch (name[5])
10319                   {
10320                     case 'e':
10321                       if (name[6] == 'n' &&
10322                           name[7] == 't')
10323                       {                           /* getpwent   */
10324                         return -KEY_getpwent;
10325                       }
10326
10327                       goto unknown;
10328
10329                     case 'n':
10330                       if (name[6] == 'a' &&
10331                           name[7] == 'm')
10332                       {                           /* getpwnam   */
10333                         return -KEY_getpwnam;
10334                       }
10335
10336                       goto unknown;
10337
10338                     case 'u':
10339                       if (name[6] == 'i' &&
10340                           name[7] == 'd')
10341                       {                           /* getpwuid   */
10342                         return -KEY_getpwuid;
10343                       }
10344
10345                       goto unknown;
10346
10347                     default:
10348                       goto unknown;
10349                   }
10350                 }
10351
10352                 goto unknown;
10353
10354               default:
10355                 goto unknown;
10356             }
10357           }
10358
10359           goto unknown;
10360
10361         case 'r':
10362           if (name[1] == 'e' &&
10363               name[2] == 'a' &&
10364               name[3] == 'd')
10365           {
10366             switch (name[4])
10367             {
10368               case 'l':
10369                 if (name[5] == 'i' &&
10370                     name[6] == 'n')
10371                 {
10372                   switch (name[7])
10373                   {
10374                     case 'e':
10375                       {                           /* readline   */
10376                         return -KEY_readline;
10377                       }
10378
10379                     case 'k':
10380                       {                           /* readlink   */
10381                         return -KEY_readlink;
10382                       }
10383
10384                     default:
10385                       goto unknown;
10386                   }
10387                 }
10388
10389                 goto unknown;
10390
10391               case 'p':
10392                 if (name[5] == 'i' &&
10393                     name[6] == 'p' &&
10394                     name[7] == 'e')
10395                 {                                 /* readpipe   */
10396                   return -KEY_readpipe;
10397                 }
10398
10399                 goto unknown;
10400
10401               default:
10402                 goto unknown;
10403             }
10404           }
10405
10406           goto unknown;
10407
10408         case 's':
10409           switch (name[1])
10410           {
10411             case 'e':
10412               if (name[2] == 't')
10413               {
10414                 switch (name[3])
10415                 {
10416                   case 'g':
10417                     if (name[4] == 'r' &&
10418                         name[5] == 'e' &&
10419                         name[6] == 'n' &&
10420                         name[7] == 't')
10421                     {                             /* setgrent   */
10422                       return -KEY_setgrent;
10423                     }
10424
10425                     goto unknown;
10426
10427                   case 'p':
10428                     if (name[4] == 'w' &&
10429                         name[5] == 'e' &&
10430                         name[6] == 'n' &&
10431                         name[7] == 't')
10432                     {                             /* setpwent   */
10433                       return -KEY_setpwent;
10434                     }
10435
10436                     goto unknown;
10437
10438                   default:
10439                     goto unknown;
10440                 }
10441               }
10442
10443               goto unknown;
10444
10445             case 'h':
10446               switch (name[2])
10447               {
10448                 case 'm':
10449                   if (name[3] == 'w' &&
10450                       name[4] == 'r' &&
10451                       name[5] == 'i' &&
10452                       name[6] == 't' &&
10453                       name[7] == 'e')
10454                   {                               /* shmwrite   */
10455                     return -KEY_shmwrite;
10456                   }
10457
10458                   goto unknown;
10459
10460                 case 'u':
10461                   if (name[3] == 't' &&
10462                       name[4] == 'd' &&
10463                       name[5] == 'o' &&
10464                       name[6] == 'w' &&
10465                       name[7] == 'n')
10466                   {                               /* shutdown   */
10467                     return -KEY_shutdown;
10468                   }
10469
10470                   goto unknown;
10471
10472                 default:
10473                   goto unknown;
10474               }
10475
10476             case 'y':
10477               if (name[2] == 's' &&
10478                   name[3] == 'w' &&
10479                   name[4] == 'r' &&
10480                   name[5] == 'i' &&
10481                   name[6] == 't' &&
10482                   name[7] == 'e')
10483               {                                   /* syswrite   */
10484                 return -KEY_syswrite;
10485               }
10486
10487               goto unknown;
10488
10489             default:
10490               goto unknown;
10491           }
10492
10493         case 't':
10494           if (name[1] == 'r' &&
10495               name[2] == 'u' &&
10496               name[3] == 'n' &&
10497               name[4] == 'c' &&
10498               name[5] == 'a' &&
10499               name[6] == 't' &&
10500               name[7] == 'e')
10501           {                                       /* truncate   */
10502             return -KEY_truncate;
10503           }
10504
10505           goto unknown;
10506
10507         default:
10508           goto unknown;
10509       }
10510
10511     case 9: /* 9 tokens of length 9 */
10512       switch (name[0])
10513       {
10514         case 'U':
10515           if (name[1] == 'N' &&
10516               name[2] == 'I' &&
10517               name[3] == 'T' &&
10518               name[4] == 'C' &&
10519               name[5] == 'H' &&
10520               name[6] == 'E' &&
10521               name[7] == 'C' &&
10522               name[8] == 'K')
10523           {                                       /* UNITCHECK  */
10524             return KEY_UNITCHECK;
10525           }
10526
10527           goto unknown;
10528
10529         case 'e':
10530           if (name[1] == 'n' &&
10531               name[2] == 'd' &&
10532               name[3] == 'n' &&
10533               name[4] == 'e' &&
10534               name[5] == 't' &&
10535               name[6] == 'e' &&
10536               name[7] == 'n' &&
10537               name[8] == 't')
10538           {                                       /* endnetent  */
10539             return -KEY_endnetent;
10540           }
10541
10542           goto unknown;
10543
10544         case 'g':
10545           if (name[1] == 'e' &&
10546               name[2] == 't' &&
10547               name[3] == 'n' &&
10548               name[4] == 'e' &&
10549               name[5] == 't' &&
10550               name[6] == 'e' &&
10551               name[7] == 'n' &&
10552               name[8] == 't')
10553           {                                       /* getnetent  */
10554             return -KEY_getnetent;
10555           }
10556
10557           goto unknown;
10558
10559         case 'l':
10560           if (name[1] == 'o' &&
10561               name[2] == 'c' &&
10562               name[3] == 'a' &&
10563               name[4] == 'l' &&
10564               name[5] == 't' &&
10565               name[6] == 'i' &&
10566               name[7] == 'm' &&
10567               name[8] == 'e')
10568           {                                       /* localtime  */
10569             return -KEY_localtime;
10570           }
10571
10572           goto unknown;
10573
10574         case 'p':
10575           if (name[1] == 'r' &&
10576               name[2] == 'o' &&
10577               name[3] == 't' &&
10578               name[4] == 'o' &&
10579               name[5] == 't' &&
10580               name[6] == 'y' &&
10581               name[7] == 'p' &&
10582               name[8] == 'e')
10583           {                                       /* prototype  */
10584             return KEY_prototype;
10585           }
10586
10587           goto unknown;
10588
10589         case 'q':
10590           if (name[1] == 'u' &&
10591               name[2] == 'o' &&
10592               name[3] == 't' &&
10593               name[4] == 'e' &&
10594               name[5] == 'm' &&
10595               name[6] == 'e' &&
10596               name[7] == 't' &&
10597               name[8] == 'a')
10598           {                                       /* quotemeta  */
10599             return -KEY_quotemeta;
10600           }
10601
10602           goto unknown;
10603
10604         case 'r':
10605           if (name[1] == 'e' &&
10606               name[2] == 'w' &&
10607               name[3] == 'i' &&
10608               name[4] == 'n' &&
10609               name[5] == 'd' &&
10610               name[6] == 'd' &&
10611               name[7] == 'i' &&
10612               name[8] == 'r')
10613           {                                       /* rewinddir  */
10614             return -KEY_rewinddir;
10615           }
10616
10617           goto unknown;
10618
10619         case 's':
10620           if (name[1] == 'e' &&
10621               name[2] == 't' &&
10622               name[3] == 'n' &&
10623               name[4] == 'e' &&
10624               name[5] == 't' &&
10625               name[6] == 'e' &&
10626               name[7] == 'n' &&
10627               name[8] == 't')
10628           {                                       /* setnetent  */
10629             return -KEY_setnetent;
10630           }
10631
10632           goto unknown;
10633
10634         case 'w':
10635           if (name[1] == 'a' &&
10636               name[2] == 'n' &&
10637               name[3] == 't' &&
10638               name[4] == 'a' &&
10639               name[5] == 'r' &&
10640               name[6] == 'r' &&
10641               name[7] == 'a' &&
10642               name[8] == 'y')
10643           {                                       /* wantarray  */
10644             return -KEY_wantarray;
10645           }
10646
10647           goto unknown;
10648
10649         default:
10650           goto unknown;
10651       }
10652
10653     case 10: /* 9 tokens of length 10 */
10654       switch (name[0])
10655       {
10656         case 'e':
10657           if (name[1] == 'n' &&
10658               name[2] == 'd')
10659           {
10660             switch (name[3])
10661             {
10662               case 'h':
10663                 if (name[4] == 'o' &&
10664                     name[5] == 's' &&
10665                     name[6] == 't' &&
10666                     name[7] == 'e' &&
10667                     name[8] == 'n' &&
10668                     name[9] == 't')
10669                 {                                 /* endhostent */
10670                   return -KEY_endhostent;
10671                 }
10672
10673                 goto unknown;
10674
10675               case 's':
10676                 if (name[4] == 'e' &&
10677                     name[5] == 'r' &&
10678                     name[6] == 'v' &&
10679                     name[7] == 'e' &&
10680                     name[8] == 'n' &&
10681                     name[9] == 't')
10682                 {                                 /* endservent */
10683                   return -KEY_endservent;
10684                 }
10685
10686                 goto unknown;
10687
10688               default:
10689                 goto unknown;
10690             }
10691           }
10692
10693           goto unknown;
10694
10695         case 'g':
10696           if (name[1] == 'e' &&
10697               name[2] == 't')
10698           {
10699             switch (name[3])
10700             {
10701               case 'h':
10702                 if (name[4] == 'o' &&
10703                     name[5] == 's' &&
10704                     name[6] == 't' &&
10705                     name[7] == 'e' &&
10706                     name[8] == 'n' &&
10707                     name[9] == 't')
10708                 {                                 /* gethostent */
10709                   return -KEY_gethostent;
10710                 }
10711
10712                 goto unknown;
10713
10714               case 's':
10715                 switch (name[4])
10716                 {
10717                   case 'e':
10718                     if (name[5] == 'r' &&
10719                         name[6] == 'v' &&
10720                         name[7] == 'e' &&
10721                         name[8] == 'n' &&
10722                         name[9] == 't')
10723                     {                             /* getservent */
10724                       return -KEY_getservent;
10725                     }
10726
10727                     goto unknown;
10728
10729                   case 'o':
10730                     if (name[5] == 'c' &&
10731                         name[6] == 'k' &&
10732                         name[7] == 'o' &&
10733                         name[8] == 'p' &&
10734                         name[9] == 't')
10735                     {                             /* getsockopt */
10736                       return -KEY_getsockopt;
10737                     }
10738
10739                     goto unknown;
10740
10741                   default:
10742                     goto unknown;
10743                 }
10744
10745               default:
10746                 goto unknown;
10747             }
10748           }
10749
10750           goto unknown;
10751
10752         case 's':
10753           switch (name[1])
10754           {
10755             case 'e':
10756               if (name[2] == 't')
10757               {
10758                 switch (name[3])
10759                 {
10760                   case 'h':
10761                     if (name[4] == 'o' &&
10762                         name[5] == 's' &&
10763                         name[6] == 't' &&
10764                         name[7] == 'e' &&
10765                         name[8] == 'n' &&
10766                         name[9] == 't')
10767                     {                             /* sethostent */
10768                       return -KEY_sethostent;
10769                     }
10770
10771                     goto unknown;
10772
10773                   case 's':
10774                     switch (name[4])
10775                     {
10776                       case 'e':
10777                         if (name[5] == 'r' &&
10778                             name[6] == 'v' &&
10779                             name[7] == 'e' &&
10780                             name[8] == 'n' &&
10781                             name[9] == 't')
10782                         {                         /* setservent */
10783                           return -KEY_setservent;
10784                         }
10785
10786                         goto unknown;
10787
10788                       case 'o':
10789                         if (name[5] == 'c' &&
10790                             name[6] == 'k' &&
10791                             name[7] == 'o' &&
10792                             name[8] == 'p' &&
10793                             name[9] == 't')
10794                         {                         /* setsockopt */
10795                           return -KEY_setsockopt;
10796                         }
10797
10798                         goto unknown;
10799
10800                       default:
10801                         goto unknown;
10802                     }
10803
10804                   default:
10805                     goto unknown;
10806                 }
10807               }
10808
10809               goto unknown;
10810
10811             case 'o':
10812               if (name[2] == 'c' &&
10813                   name[3] == 'k' &&
10814                   name[4] == 'e' &&
10815                   name[5] == 't' &&
10816                   name[6] == 'p' &&
10817                   name[7] == 'a' &&
10818                   name[8] == 'i' &&
10819                   name[9] == 'r')
10820               {                                   /* socketpair */
10821                 return -KEY_socketpair;
10822               }
10823
10824               goto unknown;
10825
10826             default:
10827               goto unknown;
10828           }
10829
10830         default:
10831           goto unknown;
10832       }
10833
10834     case 11: /* 8 tokens of length 11 */
10835       switch (name[0])
10836       {
10837         case '_':
10838           if (name[1] == '_' &&
10839               name[2] == 'P' &&
10840               name[3] == 'A' &&
10841               name[4] == 'C' &&
10842               name[5] == 'K' &&
10843               name[6] == 'A' &&
10844               name[7] == 'G' &&
10845               name[8] == 'E' &&
10846               name[9] == '_' &&
10847               name[10] == '_')
10848           {                                       /* __PACKAGE__ */
10849             return -KEY___PACKAGE__;
10850           }
10851
10852           goto unknown;
10853
10854         case 'e':
10855           if (name[1] == 'n' &&
10856               name[2] == 'd' &&
10857               name[3] == 'p' &&
10858               name[4] == 'r' &&
10859               name[5] == 'o' &&
10860               name[6] == 't' &&
10861               name[7] == 'o' &&
10862               name[8] == 'e' &&
10863               name[9] == 'n' &&
10864               name[10] == 't')
10865           {                                       /* endprotoent */
10866             return -KEY_endprotoent;
10867           }
10868
10869           goto unknown;
10870
10871         case 'g':
10872           if (name[1] == 'e' &&
10873               name[2] == 't')
10874           {
10875             switch (name[3])
10876             {
10877               case 'p':
10878                 switch (name[4])
10879                 {
10880                   case 'e':
10881                     if (name[5] == 'e' &&
10882                         name[6] == 'r' &&
10883                         name[7] == 'n' &&
10884                         name[8] == 'a' &&
10885                         name[9] == 'm' &&
10886                         name[10] == 'e')
10887                     {                             /* getpeername */
10888                       return -KEY_getpeername;
10889                     }
10890
10891                     goto unknown;
10892
10893                   case 'r':
10894                     switch (name[5])
10895                     {
10896                       case 'i':
10897                         if (name[6] == 'o' &&
10898                             name[7] == 'r' &&
10899                             name[8] == 'i' &&
10900                             name[9] == 't' &&
10901                             name[10] == 'y')
10902                         {                         /* getpriority */
10903                           return -KEY_getpriority;
10904                         }
10905
10906                         goto unknown;
10907
10908                       case 'o':
10909                         if (name[6] == 't' &&
10910                             name[7] == 'o' &&
10911                             name[8] == 'e' &&
10912                             name[9] == 'n' &&
10913                             name[10] == 't')
10914                         {                         /* getprotoent */
10915                           return -KEY_getprotoent;
10916                         }
10917
10918                         goto unknown;
10919
10920                       default:
10921                         goto unknown;
10922                     }
10923
10924                   default:
10925                     goto unknown;
10926                 }
10927
10928               case 's':
10929                 if (name[4] == 'o' &&
10930                     name[5] == 'c' &&
10931                     name[6] == 'k' &&
10932                     name[7] == 'n' &&
10933                     name[8] == 'a' &&
10934                     name[9] == 'm' &&
10935                     name[10] == 'e')
10936                 {                                 /* getsockname */
10937                   return -KEY_getsockname;
10938                 }
10939
10940                 goto unknown;
10941
10942               default:
10943                 goto unknown;
10944             }
10945           }
10946
10947           goto unknown;
10948
10949         case 's':
10950           if (name[1] == 'e' &&
10951               name[2] == 't' &&
10952               name[3] == 'p' &&
10953               name[4] == 'r')
10954           {
10955             switch (name[5])
10956             {
10957               case 'i':
10958                 if (name[6] == 'o' &&
10959                     name[7] == 'r' &&
10960                     name[8] == 'i' &&
10961                     name[9] == 't' &&
10962                     name[10] == 'y')
10963                 {                                 /* setpriority */
10964                   return -KEY_setpriority;
10965                 }
10966
10967                 goto unknown;
10968
10969               case 'o':
10970                 if (name[6] == 't' &&
10971                     name[7] == 'o' &&
10972                     name[8] == 'e' &&
10973                     name[9] == 'n' &&
10974                     name[10] == 't')
10975                 {                                 /* setprotoent */
10976                   return -KEY_setprotoent;
10977                 }
10978
10979                 goto unknown;
10980
10981               default:
10982                 goto unknown;
10983             }
10984           }
10985
10986           goto unknown;
10987
10988         default:
10989           goto unknown;
10990       }
10991
10992     case 12: /* 2 tokens of length 12 */
10993       if (name[0] == 'g' &&
10994           name[1] == 'e' &&
10995           name[2] == 't' &&
10996           name[3] == 'n' &&
10997           name[4] == 'e' &&
10998           name[5] == 't' &&
10999           name[6] == 'b' &&
11000           name[7] == 'y')
11001       {
11002         switch (name[8])
11003         {
11004           case 'a':
11005             if (name[9] == 'd' &&
11006                 name[10] == 'd' &&
11007                 name[11] == 'r')
11008             {                                     /* getnetbyaddr */
11009               return -KEY_getnetbyaddr;
11010             }
11011
11012             goto unknown;
11013
11014           case 'n':
11015             if (name[9] == 'a' &&
11016                 name[10] == 'm' &&
11017                 name[11] == 'e')
11018             {                                     /* getnetbyname */
11019               return -KEY_getnetbyname;
11020             }
11021
11022             goto unknown;
11023
11024           default:
11025             goto unknown;
11026         }
11027       }
11028
11029       goto unknown;
11030
11031     case 13: /* 4 tokens of length 13 */
11032       if (name[0] == 'g' &&
11033           name[1] == 'e' &&
11034           name[2] == 't')
11035       {
11036         switch (name[3])
11037         {
11038           case 'h':
11039             if (name[4] == 'o' &&
11040                 name[5] == 's' &&
11041                 name[6] == 't' &&
11042                 name[7] == 'b' &&
11043                 name[8] == 'y')
11044             {
11045               switch (name[9])
11046               {
11047                 case 'a':
11048                   if (name[10] == 'd' &&
11049                       name[11] == 'd' &&
11050                       name[12] == 'r')
11051                   {                               /* gethostbyaddr */
11052                     return -KEY_gethostbyaddr;
11053                   }
11054
11055                   goto unknown;
11056
11057                 case 'n':
11058                   if (name[10] == 'a' &&
11059                       name[11] == 'm' &&
11060                       name[12] == 'e')
11061                   {                               /* gethostbyname */
11062                     return -KEY_gethostbyname;
11063                   }
11064
11065                   goto unknown;
11066
11067                 default:
11068                   goto unknown;
11069               }
11070             }
11071
11072             goto unknown;
11073
11074           case 's':
11075             if (name[4] == 'e' &&
11076                 name[5] == 'r' &&
11077                 name[6] == 'v' &&
11078                 name[7] == 'b' &&
11079                 name[8] == 'y')
11080             {
11081               switch (name[9])
11082               {
11083                 case 'n':
11084                   if (name[10] == 'a' &&
11085                       name[11] == 'm' &&
11086                       name[12] == 'e')
11087                   {                               /* getservbyname */
11088                     return -KEY_getservbyname;
11089                   }
11090
11091                   goto unknown;
11092
11093                 case 'p':
11094                   if (name[10] == 'o' &&
11095                       name[11] == 'r' &&
11096                       name[12] == 't')
11097                   {                               /* getservbyport */
11098                     return -KEY_getservbyport;
11099                   }
11100
11101                   goto unknown;
11102
11103                 default:
11104                   goto unknown;
11105               }
11106             }
11107
11108             goto unknown;
11109
11110           default:
11111             goto unknown;
11112         }
11113       }
11114
11115       goto unknown;
11116
11117     case 14: /* 1 tokens of length 14 */
11118       if (name[0] == 'g' &&
11119           name[1] == 'e' &&
11120           name[2] == 't' &&
11121           name[3] == 'p' &&
11122           name[4] == 'r' &&
11123           name[5] == 'o' &&
11124           name[6] == 't' &&
11125           name[7] == 'o' &&
11126           name[8] == 'b' &&
11127           name[9] == 'y' &&
11128           name[10] == 'n' &&
11129           name[11] == 'a' &&
11130           name[12] == 'm' &&
11131           name[13] == 'e')
11132       {                                           /* getprotobyname */
11133         return -KEY_getprotobyname;
11134       }
11135
11136       goto unknown;
11137
11138     case 16: /* 1 tokens of length 16 */
11139       if (name[0] == 'g' &&
11140           name[1] == 'e' &&
11141           name[2] == 't' &&
11142           name[3] == 'p' &&
11143           name[4] == 'r' &&
11144           name[5] == 'o' &&
11145           name[6] == 't' &&
11146           name[7] == 'o' &&
11147           name[8] == 'b' &&
11148           name[9] == 'y' &&
11149           name[10] == 'n' &&
11150           name[11] == 'u' &&
11151           name[12] == 'm' &&
11152           name[13] == 'b' &&
11153           name[14] == 'e' &&
11154           name[15] == 'r')
11155       {                                           /* getprotobynumber */
11156         return -KEY_getprotobynumber;
11157       }
11158
11159       goto unknown;
11160
11161     default:
11162       goto unknown;
11163   }
11164
11165 unknown:
11166   return 0;
11167 }
11168
11169 STATIC void
11170 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11171 {
11172     dVAR;
11173
11174     PERL_ARGS_ASSERT_CHECKCOMMA;
11175
11176     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11177         if (ckWARN(WARN_SYNTAX)) {
11178             int level = 1;
11179             const char *w;
11180             for (w = s+2; *w && level; w++) {
11181                 if (*w == '(')
11182                     ++level;
11183                 else if (*w == ')')
11184                     --level;
11185             }
11186             while (isSPACE(*w))
11187                 ++w;
11188             /* the list of chars below is for end of statements or
11189              * block / parens, boolean operators (&&, ||, //) and branch
11190              * constructs (or, and, if, until, unless, while, err, for).
11191              * Not a very solid hack... */
11192             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11193                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11194                             "%s (...) interpreted as function",name);
11195         }
11196     }
11197     while (s < PL_bufend && isSPACE(*s))
11198         s++;
11199     if (*s == '(')
11200         s++;
11201     while (s < PL_bufend && isSPACE(*s))
11202         s++;
11203     if (isIDFIRST_lazy_if(s,UTF)) {
11204         const char * const w = s++;
11205         while (isALNUM_lazy_if(s,UTF))
11206             s++;
11207         while (s < PL_bufend && isSPACE(*s))
11208             s++;
11209         if (*s == ',') {
11210             GV* gv;
11211             if (keyword(w, s - w, 0))
11212                 return;
11213
11214             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11215             if (gv && GvCVu(gv))
11216                 return;
11217             Perl_croak(aTHX_ "No comma allowed after %s", what);
11218         }
11219     }
11220 }
11221
11222 /* Either returns sv, or mortalizes sv and returns a new SV*.
11223    Best used as sv=new_constant(..., sv, ...).
11224    If s, pv are NULL, calls subroutine with one argument,
11225    and type is used with error messages only. */
11226
11227 STATIC SV *
11228 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11229                SV *sv, SV *pv, const char *type, STRLEN typelen)
11230 {
11231     dVAR; dSP;
11232     HV * const table = GvHV(PL_hintgv);          /* ^H */
11233     SV *res;
11234     SV **cvp;
11235     SV *cv, *typesv;
11236     const char *why1 = "", *why2 = "", *why3 = "";
11237
11238     PERL_ARGS_ASSERT_NEW_CONSTANT;
11239
11240     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11241         SV *msg;
11242
11243         why2 = (const char *)
11244             (strEQ(key,"charnames")
11245              ? "(possibly a missing \"use charnames ...\")"
11246              : "");
11247         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11248                             (type ? type: "undef"), why2);
11249
11250         /* This is convoluted and evil ("goto considered harmful")
11251          * but I do not understand the intricacies of all the different
11252          * failure modes of %^H in here.  The goal here is to make
11253          * the most probable error message user-friendly. --jhi */
11254
11255         goto msgdone;
11256
11257     report:
11258         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11259                             (type ? type: "undef"), why1, why2, why3);
11260     msgdone:
11261         yyerror(SvPVX_const(msg));
11262         SvREFCNT_dec(msg);
11263         return sv;
11264     }
11265     cvp = hv_fetch(table, key, keylen, FALSE);
11266     if (!cvp || !SvOK(*cvp)) {
11267         why1 = "$^H{";
11268         why2 = key;
11269         why3 = "} is not defined";
11270         goto report;
11271     }
11272     sv_2mortal(sv);                     /* Parent created it permanently */
11273     cv = *cvp;
11274     if (!pv && s)
11275         pv = newSVpvn_flags(s, len, SVs_TEMP);
11276     if (type && pv)
11277         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11278     else
11279         typesv = &PL_sv_undef;
11280
11281     PUSHSTACKi(PERLSI_OVERLOAD);
11282     ENTER ;
11283     SAVETMPS;
11284
11285     PUSHMARK(SP) ;
11286     EXTEND(sp, 3);
11287     if (pv)
11288         PUSHs(pv);
11289     PUSHs(sv);
11290     if (pv)
11291         PUSHs(typesv);
11292     PUTBACK;
11293     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11294
11295     SPAGAIN ;
11296
11297     /* Check the eval first */
11298     if (!PL_in_eval && SvTRUE(ERRSV)) {
11299         sv_catpvs(ERRSV, "Propagated");
11300         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11301         (void)POPs;
11302         res = SvREFCNT_inc_simple(sv);
11303     }
11304     else {
11305         res = POPs;
11306         SvREFCNT_inc_simple_void(res);
11307     }
11308
11309     PUTBACK ;
11310     FREETMPS ;
11311     LEAVE ;
11312     POPSTACK;
11313
11314     if (!SvOK(res)) {
11315         why1 = "Call to &{$^H{";
11316         why2 = key;
11317         why3 = "}} did not return a defined value";
11318         sv = res;
11319         goto report;
11320     }
11321
11322     return res;
11323 }
11324
11325 /* Returns a NUL terminated string, with the length of the string written to
11326    *slp
11327    */
11328 STATIC char *
11329 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11330 {
11331     dVAR;
11332     register char *d = dest;
11333     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11334
11335     PERL_ARGS_ASSERT_SCAN_WORD;
11336
11337     for (;;) {
11338         if (d >= e)
11339             Perl_croak(aTHX_ ident_too_long);
11340         if (isALNUM(*s))        /* UTF handled below */
11341             *d++ = *s++;
11342         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11343             *d++ = ':';
11344             *d++ = ':';
11345             s++;
11346         }
11347         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11348             *d++ = *s++;
11349             *d++ = *s++;
11350         }
11351         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11352             char *t = s + UTF8SKIP(s);
11353             size_t len;
11354             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11355                 t += UTF8SKIP(t);
11356             len = t - s;
11357             if (d + len > e)
11358                 Perl_croak(aTHX_ ident_too_long);
11359             Copy(s, d, len, char);
11360             d += len;
11361             s = t;
11362         }
11363         else {
11364             *d = '\0';
11365             *slp = d - dest;
11366             return s;
11367         }
11368     }
11369 }
11370
11371 STATIC char *
11372 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11373 {
11374     dVAR;
11375     char *bracket = NULL;
11376     char funny = *s++;
11377     register char *d = dest;
11378     register char * const e = d + destlen + 3;    /* two-character token, ending NUL */
11379
11380     PERL_ARGS_ASSERT_SCAN_IDENT;
11381
11382     if (isSPACE(*s))
11383         s = PEEKSPACE(s);
11384     if (isDIGIT(*s)) {
11385         while (isDIGIT(*s)) {
11386             if (d >= e)
11387                 Perl_croak(aTHX_ ident_too_long);
11388             *d++ = *s++;
11389         }
11390     }
11391     else {
11392         for (;;) {
11393             if (d >= e)
11394                 Perl_croak(aTHX_ ident_too_long);
11395             if (isALNUM(*s))    /* UTF handled below */
11396                 *d++ = *s++;
11397             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11398                 *d++ = ':';
11399                 *d++ = ':';
11400                 s++;
11401             }
11402             else if (*s == ':' && s[1] == ':') {
11403                 *d++ = *s++;
11404                 *d++ = *s++;
11405             }
11406             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11407                 char *t = s + UTF8SKIP(s);
11408                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11409                     t += UTF8SKIP(t);
11410                 if (d + (t - s) > e)
11411                     Perl_croak(aTHX_ ident_too_long);
11412                 Copy(s, d, t - s, char);
11413                 d += t - s;
11414                 s = t;
11415             }
11416             else
11417                 break;
11418         }
11419     }
11420     *d = '\0';
11421     d = dest;
11422     if (*d) {
11423         if (PL_lex_state != LEX_NORMAL)
11424             PL_lex_state = LEX_INTERPENDMAYBE;
11425         return s;
11426     }
11427     if (*s == '$' && s[1] &&
11428         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11429     {
11430         return s;
11431     }
11432     if (*s == '{') {
11433         bracket = s;
11434         s++;
11435     }
11436     else if (ck_uni)
11437         check_uni();
11438     if (s < send)
11439         *d = *s++;
11440     d[1] = '\0';
11441     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11442         *d = toCTRL(*s);
11443         s++;
11444     }
11445     if (bracket) {
11446         if (isSPACE(s[-1])) {
11447             while (s < send) {
11448                 const char ch = *s++;
11449                 if (!SPACE_OR_TAB(ch)) {
11450                     *d = ch;
11451                     break;
11452                 }
11453             }
11454         }
11455         if (isIDFIRST_lazy_if(d,UTF)) {
11456             d++;
11457             if (UTF) {
11458                 char *end = s;
11459                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11460                     end += UTF8SKIP(end);
11461                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11462                         end += UTF8SKIP(end);
11463                 }
11464                 Copy(s, d, end - s, char);
11465                 d += end - s;
11466                 s = end;
11467             }
11468             else {
11469                 while ((isALNUM(*s) || *s == ':') && d < e)
11470                     *d++ = *s++;
11471                 if (d >= e)
11472                     Perl_croak(aTHX_ ident_too_long);
11473             }
11474             *d = '\0';
11475             while (s < send && SPACE_OR_TAB(*s))
11476                 s++;
11477             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11478                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11479                     const char * const brack =
11480                         (const char *)
11481                         ((*s == '[') ? "[...]" : "{...}");
11482                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11483                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11484                         funny, dest, brack, funny, dest, brack);
11485                 }
11486                 bracket++;
11487                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11488                 return s;
11489             }
11490         }
11491         /* Handle extended ${^Foo} variables
11492          * 1999-02-27 mjd-perl-patch@plover.com */
11493         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11494                  && isALNUM(*s))
11495         {
11496             d++;
11497             while (isALNUM(*s) && d < e) {
11498                 *d++ = *s++;
11499             }
11500             if (d >= e)
11501                 Perl_croak(aTHX_ ident_too_long);
11502             *d = '\0';
11503         }
11504         if (*s == '}') {
11505             s++;
11506             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11507                 PL_lex_state = LEX_INTERPEND;
11508                 PL_expect = XREF;
11509             }
11510             if (PL_lex_state == LEX_NORMAL) {
11511                 if (ckWARN(WARN_AMBIGUOUS) &&
11512                     (keyword(dest, d - dest, 0)
11513                      || get_cvn_flags(dest, d - dest, 0)))
11514                 {
11515                     if (funny == '#')
11516                         funny = '@';
11517                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11518                         "Ambiguous use of %c{%s} resolved to %c%s",
11519                         funny, dest, funny, dest);
11520                 }
11521             }
11522         }
11523         else {
11524             s = bracket;                /* let the parser handle it */
11525             *dest = '\0';
11526         }
11527     }
11528     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11529         PL_lex_state = LEX_INTERPEND;
11530     return s;
11531 }
11532
11533 static U32
11534 S_pmflag(U32 pmfl, const char ch) {
11535     switch (ch) {
11536         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11537     case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
11538     case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
11539     case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
11540     case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
11541     }
11542     return pmfl;
11543 }
11544
11545 void
11546 Perl_pmflag(pTHX_ U32* pmfl, int ch)
11547 {
11548     PERL_ARGS_ASSERT_PMFLAG;
11549
11550     Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11551                      "Perl_pmflag() is deprecated, and will be removed from the XS API");
11552
11553     if (ch<256) {
11554         *pmfl = S_pmflag(*pmfl, (char)ch);
11555     }
11556 }
11557
11558 STATIC char *
11559 S_scan_pat(pTHX_ char *start, I32 type)
11560 {
11561     dVAR;
11562     PMOP *pm;
11563     char *s = scan_str(start,!!PL_madskills,FALSE);
11564     const char * const valid_flags =
11565         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11566 #ifdef PERL_MAD
11567     char *modstart;
11568 #endif
11569
11570     PERL_ARGS_ASSERT_SCAN_PAT;
11571
11572     if (!s) {
11573         const char * const delimiter = skipspace(start);
11574         Perl_croak(aTHX_
11575                    (const char *)
11576                    (*delimiter == '?'
11577                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11578                     : "Search pattern not terminated" ));
11579     }
11580
11581     pm = (PMOP*)newPMOP(type, 0);
11582     if (PL_multi_open == '?') {
11583         /* This is the only point in the code that sets PMf_ONCE:  */
11584         pm->op_pmflags |= PMf_ONCE;
11585
11586         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11587            allows us to restrict the list needed by reset to just the ??
11588            matches.  */
11589         assert(type != OP_TRANS);
11590         if (PL_curstash) {
11591             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11592             U32 elements;
11593             if (!mg) {
11594                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11595                                  0);
11596             }
11597             elements = mg->mg_len / sizeof(PMOP**);
11598             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11599             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11600             mg->mg_len = elements * sizeof(PMOP**);
11601             PmopSTASH_set(pm,PL_curstash);
11602         }
11603     }
11604 #ifdef PERL_MAD
11605     modstart = s;
11606 #endif
11607     while (*s && strchr(valid_flags, *s))
11608         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11609 #ifdef PERL_MAD
11610     if (PL_madskills && modstart != s) {
11611         SV* tmptoken = newSVpvn(modstart, s - modstart);
11612         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11613     }
11614 #endif
11615     /* issue a warning if /c is specified,but /g is not */
11616     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11617     {
11618         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11619                        "Use of /c modifier is meaningless without /g" );
11620     }
11621
11622     PL_lex_op = (OP*)pm;
11623     pl_yylval.ival = OP_MATCH;
11624     return s;
11625 }
11626
11627 STATIC char *
11628 S_scan_subst(pTHX_ char *start)
11629 {
11630     dVAR;
11631     register char *s;
11632     register PMOP *pm;
11633     I32 first_start;
11634     I32 es = 0;
11635 #ifdef PERL_MAD
11636     char *modstart;
11637 #endif
11638
11639     PERL_ARGS_ASSERT_SCAN_SUBST;
11640
11641     pl_yylval.ival = OP_NULL;
11642
11643     s = scan_str(start,!!PL_madskills,FALSE);
11644
11645     if (!s)
11646         Perl_croak(aTHX_ "Substitution pattern not terminated");
11647
11648     if (s[-1] == PL_multi_open)
11649         s--;
11650 #ifdef PERL_MAD
11651     if (PL_madskills) {
11652         CURMAD('q', PL_thisopen);
11653         CURMAD('_', PL_thiswhite);
11654         CURMAD('E', PL_thisstuff);
11655         CURMAD('Q', PL_thisclose);
11656         PL_realtokenstart = s - SvPVX(PL_linestr);
11657     }
11658 #endif
11659
11660     first_start = PL_multi_start;
11661     s = scan_str(s,!!PL_madskills,FALSE);
11662     if (!s) {
11663         if (PL_lex_stuff) {
11664             SvREFCNT_dec(PL_lex_stuff);
11665             PL_lex_stuff = NULL;
11666         }
11667         Perl_croak(aTHX_ "Substitution replacement not terminated");
11668     }
11669     PL_multi_start = first_start;       /* so whole substitution is taken together */
11670
11671     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11672
11673 #ifdef PERL_MAD
11674     if (PL_madskills) {
11675         CURMAD('z', PL_thisopen);
11676         CURMAD('R', PL_thisstuff);
11677         CURMAD('Z', PL_thisclose);
11678     }
11679     modstart = s;
11680 #endif
11681
11682     while (*s) {
11683         if (*s == EXEC_PAT_MOD) {
11684             s++;
11685             es++;
11686         }
11687         else if (strchr(S_PAT_MODS, *s))
11688             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11689         else
11690             break;
11691     }
11692
11693 #ifdef PERL_MAD
11694     if (PL_madskills) {
11695         if (modstart != s)
11696             curmad('m', newSVpvn(modstart, s - modstart));
11697         append_madprops(PL_thismad, (OP*)pm, 0);
11698         PL_thismad = 0;
11699     }
11700 #endif
11701     if ((pm->op_pmflags & PMf_CONTINUE)) {
11702         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11703     }
11704
11705     if (es) {
11706         SV * const repl = newSVpvs("");
11707
11708         PL_sublex_info.super_bufptr = s;
11709         PL_sublex_info.super_bufend = PL_bufend;
11710         PL_multi_end = 0;
11711         pm->op_pmflags |= PMf_EVAL;
11712         while (es-- > 0) {
11713             if (es)
11714                 sv_catpvs(repl, "eval ");
11715             else
11716                 sv_catpvs(repl, "do ");
11717         }
11718         sv_catpvs(repl, "{");
11719         sv_catsv(repl, PL_lex_repl);
11720         if (strchr(SvPVX(PL_lex_repl), '#'))
11721             sv_catpvs(repl, "\n");
11722         sv_catpvs(repl, "}");
11723         SvEVALED_on(repl);
11724         SvREFCNT_dec(PL_lex_repl);
11725         PL_lex_repl = repl;
11726     }
11727
11728     PL_lex_op = (OP*)pm;
11729     pl_yylval.ival = OP_SUBST;
11730     return s;
11731 }
11732
11733 STATIC char *
11734 S_scan_trans(pTHX_ char *start)
11735 {
11736     dVAR;
11737     register char* s;
11738     OP *o;
11739     short *tbl;
11740     U8 squash;
11741     U8 del;
11742     U8 complement;
11743 #ifdef PERL_MAD
11744     char *modstart;
11745 #endif
11746
11747     PERL_ARGS_ASSERT_SCAN_TRANS;
11748
11749     pl_yylval.ival = OP_NULL;
11750
11751     s = scan_str(start,!!PL_madskills,FALSE);
11752     if (!s)
11753         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11754
11755     if (s[-1] == PL_multi_open)
11756         s--;
11757 #ifdef PERL_MAD
11758     if (PL_madskills) {
11759         CURMAD('q', PL_thisopen);
11760         CURMAD('_', PL_thiswhite);
11761         CURMAD('E', PL_thisstuff);
11762         CURMAD('Q', PL_thisclose);
11763         PL_realtokenstart = s - SvPVX(PL_linestr);
11764     }
11765 #endif
11766
11767     s = scan_str(s,!!PL_madskills,FALSE);
11768     if (!s) {
11769         if (PL_lex_stuff) {
11770             SvREFCNT_dec(PL_lex_stuff);
11771             PL_lex_stuff = NULL;
11772         }
11773         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11774     }
11775     if (PL_madskills) {
11776         CURMAD('z', PL_thisopen);
11777         CURMAD('R', PL_thisstuff);
11778         CURMAD('Z', PL_thisclose);
11779     }
11780
11781     complement = del = squash = 0;
11782 #ifdef PERL_MAD
11783     modstart = s;
11784 #endif
11785     while (1) {
11786         switch (*s) {
11787         case 'c':
11788             complement = OPpTRANS_COMPLEMENT;
11789             break;
11790         case 'd':
11791             del = OPpTRANS_DELETE;
11792             break;
11793         case 's':
11794             squash = OPpTRANS_SQUASH;
11795             break;
11796         default:
11797             goto no_more;
11798         }
11799         s++;
11800     }
11801   no_more:
11802
11803     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11804     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11805     o->op_private &= ~OPpTRANS_ALL;
11806     o->op_private |= del|squash|complement|
11807       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11808       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11809
11810     PL_lex_op = o;
11811     pl_yylval.ival = OP_TRANS;
11812
11813 #ifdef PERL_MAD
11814     if (PL_madskills) {
11815         if (modstart != s)
11816             curmad('m', newSVpvn(modstart, s - modstart));
11817         append_madprops(PL_thismad, o, 0);
11818         PL_thismad = 0;
11819     }
11820 #endif
11821
11822     return s;
11823 }
11824
11825 STATIC char *
11826 S_scan_heredoc(pTHX_ register char *s)
11827 {
11828     dVAR;
11829     SV *herewas;
11830     I32 op_type = OP_SCALAR;
11831     I32 len;
11832     SV *tmpstr;
11833     char term;
11834     const char *found_newline;
11835     register char *d;
11836     register char *e;
11837     char *peek;
11838     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11839 #ifdef PERL_MAD
11840     I32 stuffstart = s - SvPVX(PL_linestr);
11841     char *tstart;
11842
11843     PL_realtokenstart = -1;
11844 #endif
11845
11846     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11847
11848     s += 2;
11849     d = PL_tokenbuf;
11850     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11851     if (!outer)
11852         *d++ = '\n';
11853     peek = s;
11854     while (SPACE_OR_TAB(*peek))
11855         peek++;
11856     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11857         s = peek;
11858         term = *s++;
11859         s = delimcpy(d, e, s, PL_bufend, term, &len);
11860         d += len;
11861         if (s < PL_bufend)
11862             s++;
11863     }
11864     else {
11865         if (*s == '\\')
11866             s++, term = '\'';
11867         else
11868             term = '"';
11869         if (!isALNUM_lazy_if(s,UTF))
11870             deprecate("bare << to mean <<\"\"");
11871         for (; isALNUM_lazy_if(s,UTF); s++) {
11872             if (d < e)
11873                 *d++ = *s;
11874         }
11875     }
11876     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11877         Perl_croak(aTHX_ "Delimiter for here document is too long");
11878     *d++ = '\n';
11879     *d = '\0';
11880     len = d - PL_tokenbuf;
11881
11882 #ifdef PERL_MAD
11883     if (PL_madskills) {
11884         tstart = PL_tokenbuf + !outer;
11885         PL_thisclose = newSVpvn(tstart, len - !outer);
11886         tstart = SvPVX(PL_linestr) + stuffstart;
11887         PL_thisopen = newSVpvn(tstart, s - tstart);
11888         stuffstart = s - SvPVX(PL_linestr);
11889     }
11890 #endif
11891 #ifndef PERL_STRICT_CR
11892     d = strchr(s, '\r');
11893     if (d) {
11894         char * const olds = s;
11895         s = d;
11896         while (s < PL_bufend) {
11897             if (*s == '\r') {
11898                 *d++ = '\n';
11899                 if (*++s == '\n')
11900                     s++;
11901             }
11902             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11903                 *d++ = *s++;
11904                 s++;
11905             }
11906             else
11907                 *d++ = *s++;
11908         }
11909         *d = '\0';
11910         PL_bufend = d;
11911         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11912         s = olds;
11913     }
11914 #endif
11915 #ifdef PERL_MAD
11916     found_newline = 0;
11917 #endif
11918     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11919         herewas = newSVpvn(s,PL_bufend-s);
11920     }
11921     else {
11922 #ifdef PERL_MAD
11923         herewas = newSVpvn(s-1,found_newline-s+1);
11924 #else
11925         s--;
11926         herewas = newSVpvn(s,found_newline-s);
11927 #endif
11928     }
11929 #ifdef PERL_MAD
11930     if (PL_madskills) {
11931         tstart = SvPVX(PL_linestr) + stuffstart;
11932         if (PL_thisstuff)
11933             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11934         else
11935             PL_thisstuff = newSVpvn(tstart, s - tstart);
11936     }
11937 #endif
11938     s += SvCUR(herewas);
11939
11940 #ifdef PERL_MAD
11941     stuffstart = s - SvPVX(PL_linestr);
11942
11943     if (found_newline)
11944         s--;
11945 #endif
11946
11947     tmpstr = newSV_type(SVt_PVIV);
11948     SvGROW(tmpstr, 80);
11949     if (term == '\'') {
11950         op_type = OP_CONST;
11951         SvIV_set(tmpstr, -1);
11952     }
11953     else if (term == '`') {
11954         op_type = OP_BACKTICK;
11955         SvIV_set(tmpstr, '\\');
11956     }
11957
11958     CLINE;
11959     PL_multi_start = CopLINE(PL_curcop);
11960     PL_multi_open = PL_multi_close = '<';
11961     term = *PL_tokenbuf;
11962     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11963         char * const bufptr = PL_sublex_info.super_bufptr;
11964         char * const bufend = PL_sublex_info.super_bufend;
11965         char * const olds = s - SvCUR(herewas);
11966         s = strchr(bufptr, '\n');
11967         if (!s)
11968             s = bufend;
11969         d = s;
11970         while (s < bufend &&
11971           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11972             if (*s++ == '\n')
11973                 CopLINE_inc(PL_curcop);
11974         }
11975         if (s >= bufend) {
11976             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11977             missingterm(PL_tokenbuf);
11978         }
11979         sv_setpvn(herewas,bufptr,d-bufptr+1);
11980         sv_setpvn(tmpstr,d+1,s-d);
11981         s += len - 1;
11982         sv_catpvn(herewas,s,bufend-s);
11983         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11984
11985         s = olds;
11986         goto retval;
11987     }
11988     else if (!outer) {
11989         d = s;
11990         while (s < PL_bufend &&
11991           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11992             if (*s++ == '\n')
11993                 CopLINE_inc(PL_curcop);
11994         }
11995         if (s >= PL_bufend) {
11996             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11997             missingterm(PL_tokenbuf);
11998         }
11999         sv_setpvn(tmpstr,d+1,s-d);
12000 #ifdef PERL_MAD
12001         if (PL_madskills) {
12002             if (PL_thisstuff)
12003                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12004             else
12005                 PL_thisstuff = newSVpvn(d + 1, s - d);
12006             stuffstart = s - SvPVX(PL_linestr);
12007         }
12008 #endif
12009         s += len - 1;
12010         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12011
12012         sv_catpvn(herewas,s,PL_bufend-s);
12013         sv_setsv(PL_linestr,herewas);
12014         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12015         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12016         PL_last_lop = PL_last_uni = NULL;
12017     }
12018     else
12019         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12020     while (s >= PL_bufend) {    /* multiple line string? */
12021 #ifdef PERL_MAD
12022         if (PL_madskills) {
12023             tstart = SvPVX(PL_linestr) + stuffstart;
12024             if (PL_thisstuff)
12025                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12026             else
12027                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12028         }
12029 #endif
12030         PL_bufptr = s;
12031         if (!outer || !lex_next_chunk(0)) {
12032             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12033             missingterm(PL_tokenbuf);
12034         }
12035         s = PL_bufptr;
12036 #ifdef PERL_MAD
12037         stuffstart = s - SvPVX(PL_linestr);
12038 #endif
12039         CopLINE_inc(PL_curcop);
12040         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12041         PL_last_lop = PL_last_uni = NULL;
12042 #ifndef PERL_STRICT_CR
12043         if (PL_bufend - PL_linestart >= 2) {
12044             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12045                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12046             {
12047                 PL_bufend[-2] = '\n';
12048                 PL_bufend--;
12049                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12050             }
12051             else if (PL_bufend[-1] == '\r')
12052                 PL_bufend[-1] = '\n';
12053         }
12054         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12055             PL_bufend[-1] = '\n';
12056 #endif
12057         if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
12058             update_debugger_info(PL_linestr, NULL, 0);
12059         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12060             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12061             *(SvPVX(PL_linestr) + off ) = ' ';
12062             sv_catsv(PL_linestr,herewas);
12063             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12064             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12065         }
12066         else {
12067             s = PL_bufend;
12068             sv_catsv(tmpstr,PL_linestr);
12069         }
12070     }
12071     s++;
12072 retval:
12073     PL_multi_end = CopLINE(PL_curcop);
12074     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12075         SvPV_shrink_to_cur(tmpstr);
12076     }
12077     SvREFCNT_dec(herewas);
12078     if (!IN_BYTES) {
12079         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12080             SvUTF8_on(tmpstr);
12081         else if (PL_encoding)
12082             sv_recode_to_utf8(tmpstr, PL_encoding);
12083     }
12084     PL_lex_stuff = tmpstr;
12085     pl_yylval.ival = op_type;
12086     return s;
12087 }
12088
12089 /* scan_inputsymbol
12090    takes: current position in input buffer
12091    returns: new position in input buffer
12092    side-effects: pl_yylval and lex_op are set.
12093
12094    This code handles:
12095
12096    <>           read from ARGV
12097    <FH>         read from filehandle
12098    <pkg::FH>    read from package qualified filehandle
12099    <pkg'FH>     read from package qualified filehandle
12100    <$fh>        read from filehandle in $fh
12101    <*.h>        filename glob
12102
12103 */
12104
12105 STATIC char *
12106 S_scan_inputsymbol(pTHX_ char *start)
12107 {
12108     dVAR;
12109     register char *s = start;           /* current position in buffer */
12110     char *end;
12111     I32 len;
12112     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12113     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12114
12115     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12116
12117     end = strchr(s, '\n');
12118     if (!end)
12119         end = PL_bufend;
12120     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12121
12122     /* die if we didn't have space for the contents of the <>,
12123        or if it didn't end, or if we see a newline
12124     */
12125
12126     if (len >= (I32)sizeof PL_tokenbuf)
12127         Perl_croak(aTHX_ "Excessively long <> operator");
12128     if (s >= end)
12129         Perl_croak(aTHX_ "Unterminated <> operator");
12130
12131     s++;
12132
12133     /* check for <$fh>
12134        Remember, only scalar variables are interpreted as filehandles by
12135        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12136        treated as a glob() call.
12137        This code makes use of the fact that except for the $ at the front,
12138        a scalar variable and a filehandle look the same.
12139     */
12140     if (*d == '$' && d[1]) d++;
12141
12142     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12143     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12144         d++;
12145
12146     /* If we've tried to read what we allow filehandles to look like, and
12147        there's still text left, then it must be a glob() and not a getline.
12148        Use scan_str to pull out the stuff between the <> and treat it
12149        as nothing more than a string.
12150     */
12151
12152     if (d - PL_tokenbuf != len) {
12153         pl_yylval.ival = OP_GLOB;
12154         s = scan_str(start,!!PL_madskills,FALSE);
12155         if (!s)
12156            Perl_croak(aTHX_ "Glob not terminated");
12157         return s;
12158     }
12159     else {
12160         bool readline_overriden = FALSE;
12161         GV *gv_readline;
12162         GV **gvp;
12163         /* we're in a filehandle read situation */
12164         d = PL_tokenbuf;
12165
12166         /* turn <> into <ARGV> */
12167         if (!len)
12168             Copy("ARGV",d,5,char);
12169
12170         /* Check whether readline() is overriden */
12171         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12172         if ((gv_readline
12173                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12174                 ||
12175                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12176                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12177                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12178             readline_overriden = TRUE;
12179
12180         /* if <$fh>, create the ops to turn the variable into a
12181            filehandle
12182         */
12183         if (*d == '$') {
12184             /* try to find it in the pad for this block, otherwise find
12185                add symbol table ops
12186             */
12187             const PADOFFSET tmp = pad_findmy(d, len, 0);
12188             if (tmp != NOT_IN_PAD) {
12189                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12190                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12191                     HEK * const stashname = HvNAME_HEK(stash);
12192                     SV * const sym = sv_2mortal(newSVhek(stashname));
12193                     sv_catpvs(sym, "::");
12194                     sv_catpv(sym, d+1);
12195                     d = SvPVX(sym);
12196                     goto intro_sym;
12197                 }
12198                 else {
12199                     OP * const o = newOP(OP_PADSV, 0);
12200                     o->op_targ = tmp;
12201                     PL_lex_op = readline_overriden
12202                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12203                                 append_elem(OP_LIST, o,
12204                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12205                         : (OP*)newUNOP(OP_READLINE, 0, o);
12206                 }
12207             }
12208             else {
12209                 GV *gv;
12210                 ++d;
12211 intro_sym:
12212                 gv = gv_fetchpv(d,
12213                                 (PL_in_eval
12214                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12215                                  : GV_ADDMULTI),
12216                                 SVt_PV);
12217                 PL_lex_op = readline_overriden
12218                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12219                             append_elem(OP_LIST,
12220                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12221                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12222                     : (OP*)newUNOP(OP_READLINE, 0,
12223                             newUNOP(OP_RV2SV, 0,
12224                                 newGVOP(OP_GV, 0, gv)));
12225             }
12226             if (!readline_overriden)
12227                 PL_lex_op->op_flags |= OPf_SPECIAL;
12228             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12229             pl_yylval.ival = OP_NULL;
12230         }
12231
12232         /* If it's none of the above, it must be a literal filehandle
12233            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12234         else {
12235             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12236             PL_lex_op = readline_overriden
12237                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12238                         append_elem(OP_LIST,
12239                             newGVOP(OP_GV, 0, gv),
12240                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12241                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12242             pl_yylval.ival = OP_NULL;
12243         }
12244     }
12245
12246     return s;
12247 }
12248
12249
12250 /* scan_str
12251    takes: start position in buffer
12252           keep_quoted preserve \ on the embedded delimiter(s)
12253           keep_delims preserve the delimiters around the string
12254    returns: position to continue reading from buffer
12255    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12256         updates the read buffer.
12257
12258    This subroutine pulls a string out of the input.  It is called for:
12259         q               single quotes           q(literal text)
12260         '               single quotes           'literal text'
12261         qq              double quotes           qq(interpolate $here please)
12262         "               double quotes           "interpolate $here please"
12263         qx              backticks               qx(/bin/ls -l)
12264         `               backticks               `/bin/ls -l`
12265         qw              quote words             @EXPORT_OK = qw( func() $spam )
12266         m//             regexp match            m/this/
12267         s///            regexp substitute       s/this/that/
12268         tr///           string transliterate    tr/this/that/
12269         y///            string transliterate    y/this/that/
12270         ($*@)           sub prototypes          sub foo ($)
12271         (stuff)         sub attr parameters     sub foo : attr(stuff)
12272         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12273
12274    In most of these cases (all but <>, patterns and transliterate)
12275    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12276    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12277    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12278    calls scan_str().
12279
12280    It skips whitespace before the string starts, and treats the first
12281    character as the delimiter.  If the delimiter is one of ([{< then
12282    the corresponding "close" character )]}> is used as the closing
12283    delimiter.  It allows quoting of delimiters, and if the string has
12284    balanced delimiters ([{<>}]) it allows nesting.
12285
12286    On success, the SV with the resulting string is put into lex_stuff or,
12287    if that is already non-NULL, into lex_repl. The second case occurs only
12288    when parsing the RHS of the special constructs s/// and tr/// (y///).
12289    For convenience, the terminating delimiter character is stuffed into
12290    SvIVX of the SV.
12291 */
12292
12293 STATIC char *
12294 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12295 {
12296     dVAR;
12297     SV *sv;                             /* scalar value: string */
12298     const char *tmps;                   /* temp string, used for delimiter matching */
12299     register char *s = start;           /* current position in the buffer */
12300     register char term;                 /* terminating character */
12301     register char *to;                  /* current position in the sv's data */
12302     I32 brackets = 1;                   /* bracket nesting level */
12303     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12304     I32 termcode;                       /* terminating char. code */
12305     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12306     STRLEN termlen;                     /* length of terminating string */
12307     int last_off = 0;                   /* last position for nesting bracket */
12308 #ifdef PERL_MAD
12309     int stuffstart;
12310     char *tstart;
12311 #endif
12312
12313     PERL_ARGS_ASSERT_SCAN_STR;
12314
12315     /* skip space before the delimiter */
12316     if (isSPACE(*s)) {
12317         s = PEEKSPACE(s);
12318     }
12319
12320 #ifdef PERL_MAD
12321     if (PL_realtokenstart >= 0) {
12322         stuffstart = PL_realtokenstart;
12323         PL_realtokenstart = -1;
12324     }
12325     else
12326         stuffstart = start - SvPVX(PL_linestr);
12327 #endif
12328     /* mark where we are, in case we need to report errors */
12329     CLINE;
12330
12331     /* after skipping whitespace, the next character is the terminator */
12332     term = *s;
12333     if (!UTF) {
12334         termcode = termstr[0] = term;
12335         termlen = 1;
12336     }
12337     else {
12338         termcode = utf8_to_uvchr((U8*)s, &termlen);
12339         Copy(s, termstr, termlen, U8);
12340         if (!UTF8_IS_INVARIANT(term))
12341             has_utf8 = TRUE;
12342     }
12343
12344     /* mark where we are */
12345     PL_multi_start = CopLINE(PL_curcop);
12346     PL_multi_open = term;
12347
12348     /* find corresponding closing delimiter */
12349     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12350         termcode = termstr[0] = term = tmps[5];
12351
12352     PL_multi_close = term;
12353
12354     /* create a new SV to hold the contents.  79 is the SV's initial length.
12355        What a random number. */
12356     sv = newSV_type(SVt_PVIV);
12357     SvGROW(sv, 80);
12358     SvIV_set(sv, termcode);
12359     (void)SvPOK_only(sv);               /* validate pointer */
12360
12361     /* move past delimiter and try to read a complete string */
12362     if (keep_delims)
12363         sv_catpvn(sv, s, termlen);
12364     s += termlen;
12365 #ifdef PERL_MAD
12366     tstart = SvPVX(PL_linestr) + stuffstart;
12367     if (!PL_thisopen && !keep_delims) {
12368         PL_thisopen = newSVpvn(tstart, s - tstart);
12369         stuffstart = s - SvPVX(PL_linestr);
12370     }
12371 #endif
12372     for (;;) {
12373         if (PL_encoding && !UTF) {
12374             bool cont = TRUE;
12375
12376             while (cont) {
12377                 int offset = s - SvPVX_const(PL_linestr);
12378                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12379                                            &offset, (char*)termstr, termlen);
12380                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12381                 char * const svlast = SvEND(sv) - 1;
12382
12383                 for (; s < ns; s++) {
12384                     if (*s == '\n' && !PL_rsfp)
12385                         CopLINE_inc(PL_curcop);
12386                 }
12387                 if (!found)
12388                     goto read_more_line;
12389                 else {
12390                     /* handle quoted delimiters */
12391                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12392                         const char *t;
12393                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12394                             t--;
12395                         if ((svlast-1 - t) % 2) {
12396                             if (!keep_quoted) {
12397                                 *(svlast-1) = term;
12398                                 *svlast = '\0';
12399                                 SvCUR_set(sv, SvCUR(sv) - 1);
12400                             }
12401                             continue;
12402                         }
12403                     }
12404                     if (PL_multi_open == PL_multi_close) {
12405                         cont = FALSE;
12406                     }
12407                     else {
12408                         const char *t;
12409                         char *w;
12410                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12411                             /* At here, all closes are "was quoted" one,
12412                                so we don't check PL_multi_close. */
12413                             if (*t == '\\') {
12414                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12415                                     t++;
12416                                 else
12417                                     *w++ = *t++;
12418                             }
12419                             else if (*t == PL_multi_open)
12420                                 brackets++;
12421
12422                             *w = *t;
12423                         }
12424                         if (w < t) {
12425                             *w++ = term;
12426                             *w = '\0';
12427                             SvCUR_set(sv, w - SvPVX_const(sv));
12428                         }
12429                         last_off = w - SvPVX(sv);
12430                         if (--brackets <= 0)
12431                             cont = FALSE;
12432                     }
12433                 }
12434             }
12435             if (!keep_delims) {
12436                 SvCUR_set(sv, SvCUR(sv) - 1);
12437                 *SvEND(sv) = '\0';
12438             }
12439             break;
12440         }
12441
12442         /* extend sv if need be */
12443         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12444         /* set 'to' to the next character in the sv's string */
12445         to = SvPVX(sv)+SvCUR(sv);
12446
12447         /* if open delimiter is the close delimiter read unbridle */
12448         if (PL_multi_open == PL_multi_close) {
12449             for (; s < PL_bufend; s++,to++) {
12450                 /* embedded newlines increment the current line number */
12451                 if (*s == '\n' && !PL_rsfp)
12452                     CopLINE_inc(PL_curcop);
12453                 /* handle quoted delimiters */
12454                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12455                     if (!keep_quoted && s[1] == term)
12456                         s++;
12457                 /* any other quotes are simply copied straight through */
12458                     else
12459                         *to++ = *s++;
12460                 }
12461                 /* terminate when run out of buffer (the for() condition), or
12462                    have found the terminator */
12463                 else if (*s == term) {
12464                     if (termlen == 1)
12465                         break;
12466                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12467                         break;
12468                 }
12469                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12470                     has_utf8 = TRUE;
12471                 *to = *s;
12472             }
12473         }
12474
12475         /* if the terminator isn't the same as the start character (e.g.,
12476            matched brackets), we have to allow more in the quoting, and
12477            be prepared for nested brackets.
12478         */
12479         else {
12480             /* read until we run out of string, or we find the terminator */
12481             for (; s < PL_bufend; s++,to++) {
12482                 /* embedded newlines increment the line count */
12483                 if (*s == '\n' && !PL_rsfp)
12484                     CopLINE_inc(PL_curcop);
12485                 /* backslashes can escape the open or closing characters */
12486                 if (*s == '\\' && s+1 < PL_bufend) {
12487                     if (!keep_quoted &&
12488                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12489                         s++;
12490                     else
12491                         *to++ = *s++;
12492                 }
12493                 /* allow nested opens and closes */
12494                 else if (*s == PL_multi_close && --brackets <= 0)
12495                     break;
12496                 else if (*s == PL_multi_open)
12497                     brackets++;
12498                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12499                     has_utf8 = TRUE;
12500                 *to = *s;
12501             }
12502         }
12503         /* terminate the copied string and update the sv's end-of-string */
12504         *to = '\0';
12505         SvCUR_set(sv, to - SvPVX_const(sv));
12506
12507         /*
12508          * this next chunk reads more into the buffer if we're not done yet
12509          */
12510
12511         if (s < PL_bufend)
12512             break;              /* handle case where we are done yet :-) */
12513
12514 #ifndef PERL_STRICT_CR
12515         if (to - SvPVX_const(sv) >= 2) {
12516             if ((to[-2] == '\r' && to[-1] == '\n') ||
12517                 (to[-2] == '\n' && to[-1] == '\r'))
12518             {
12519                 to[-2] = '\n';
12520                 to--;
12521                 SvCUR_set(sv, to - SvPVX_const(sv));
12522             }
12523             else if (to[-1] == '\r')
12524                 to[-1] = '\n';
12525         }
12526         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12527             to[-1] = '\n';
12528 #endif
12529
12530      read_more_line:
12531         /* if we're out of file, or a read fails, bail and reset the current
12532            line marker so we can report where the unterminated string began
12533         */
12534 #ifdef PERL_MAD
12535         if (PL_madskills) {
12536             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12537             if (PL_thisstuff)
12538                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12539             else
12540                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12541         }
12542 #endif
12543         CopLINE_inc(PL_curcop);
12544         PL_bufptr = PL_bufend;
12545         if (!lex_next_chunk(0)) {
12546             sv_free(sv);
12547             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12548             return NULL;
12549         }
12550         s = PL_bufptr;
12551 #ifdef PERL_MAD
12552         stuffstart = 0;
12553 #endif
12554     }
12555
12556     /* at this point, we have successfully read the delimited string */
12557
12558     if (!PL_encoding || UTF) {
12559 #ifdef PERL_MAD
12560         if (PL_madskills) {
12561             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12562             const int len = s - tstart;
12563             if (PL_thisstuff)
12564                 sv_catpvn(PL_thisstuff, tstart, len);
12565             else
12566                 PL_thisstuff = newSVpvn(tstart, len);
12567             if (!PL_thisclose && !keep_delims)
12568                 PL_thisclose = newSVpvn(s,termlen);
12569         }
12570 #endif
12571
12572         if (keep_delims)
12573             sv_catpvn(sv, s, termlen);
12574         s += termlen;
12575     }
12576 #ifdef PERL_MAD
12577     else {
12578         if (PL_madskills) {
12579             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12580             const int len = s - tstart - termlen;
12581             if (PL_thisstuff)
12582                 sv_catpvn(PL_thisstuff, tstart, len);
12583             else
12584                 PL_thisstuff = newSVpvn(tstart, len);
12585             if (!PL_thisclose && !keep_delims)
12586                 PL_thisclose = newSVpvn(s - termlen,termlen);
12587         }
12588     }
12589 #endif
12590     if (has_utf8 || PL_encoding)
12591         SvUTF8_on(sv);
12592
12593     PL_multi_end = CopLINE(PL_curcop);
12594
12595     /* if we allocated too much space, give some back */
12596     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12597         SvLEN_set(sv, SvCUR(sv) + 1);
12598         SvPV_renew(sv, SvLEN(sv));
12599     }
12600
12601     /* decide whether this is the first or second quoted string we've read
12602        for this op
12603     */
12604
12605     if (PL_lex_stuff)
12606         PL_lex_repl = sv;
12607     else
12608         PL_lex_stuff = sv;
12609     return s;
12610 }
12611
12612 /*
12613   scan_num
12614   takes: pointer to position in buffer
12615   returns: pointer to new position in buffer
12616   side-effects: builds ops for the constant in pl_yylval.op
12617
12618   Read a number in any of the formats that Perl accepts:
12619
12620   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12621   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12622   0b[01](_?[01])*
12623   0[0-7](_?[0-7])*
12624   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12625
12626   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12627   thing it reads.
12628
12629   If it reads a number without a decimal point or an exponent, it will
12630   try converting the number to an integer and see if it can do so
12631   without loss of precision.
12632 */
12633
12634 char *
12635 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12636 {
12637     dVAR;
12638     register const char *s = start;     /* current position in buffer */
12639     register char *d;                   /* destination in temp buffer */
12640     register char *e;                   /* end of temp buffer */
12641     NV nv;                              /* number read, as a double */
12642     SV *sv = NULL;                      /* place to put the converted number */
12643     bool floatit;                       /* boolean: int or float? */
12644     const char *lastub = NULL;          /* position of last underbar */
12645     static char const number_too_long[] = "Number too long";
12646
12647     PERL_ARGS_ASSERT_SCAN_NUM;
12648
12649     /* We use the first character to decide what type of number this is */
12650
12651     switch (*s) {
12652     default:
12653       Perl_croak(aTHX_ "panic: scan_num");
12654
12655     /* if it starts with a 0, it could be an octal number, a decimal in
12656        0.13 disguise, or a hexadecimal number, or a binary number. */
12657     case '0':
12658         {
12659           /* variables:
12660              u          holds the "number so far"
12661              shift      the power of 2 of the base
12662                         (hex == 4, octal == 3, binary == 1)
12663              overflowed was the number more than we can hold?
12664
12665              Shift is used when we add a digit.  It also serves as an "are
12666              we in octal/hex/binary?" indicator to disallow hex characters
12667              when in octal mode.
12668            */
12669             NV n = 0.0;
12670             UV u = 0;
12671             I32 shift;
12672             bool overflowed = FALSE;
12673             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12674             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12675             static const char* const bases[5] =
12676               { "", "binary", "", "octal", "hexadecimal" };
12677             static const char* const Bases[5] =
12678               { "", "Binary", "", "Octal", "Hexadecimal" };
12679             static const char* const maxima[5] =
12680               { "",
12681                 "0b11111111111111111111111111111111",
12682                 "",
12683                 "037777777777",
12684                 "0xffffffff" };
12685             const char *base, *Base, *max;
12686
12687             /* check for hex */
12688             if (s[1] == 'x') {
12689                 shift = 4;
12690                 s += 2;
12691                 just_zero = FALSE;
12692             } else if (s[1] == 'b') {
12693                 shift = 1;
12694                 s += 2;
12695                 just_zero = FALSE;
12696             }
12697             /* check for a decimal in disguise */
12698             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12699                 goto decimal;
12700             /* so it must be octal */
12701             else {
12702                 shift = 3;
12703                 s++;
12704             }
12705
12706             if (*s == '_') {
12707                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12708                                "Misplaced _ in number");
12709                lastub = s++;
12710             }
12711
12712             base = bases[shift];
12713             Base = Bases[shift];
12714             max  = maxima[shift];
12715
12716             /* read the rest of the number */
12717             for (;;) {
12718                 /* x is used in the overflow test,
12719                    b is the digit we're adding on. */
12720                 UV x, b;
12721
12722                 switch (*s) {
12723
12724                 /* if we don't mention it, we're done */
12725                 default:
12726                     goto out;
12727
12728                 /* _ are ignored -- but warned about if consecutive */
12729                 case '_':
12730                     if (lastub && s == lastub + 1)
12731                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12732                                        "Misplaced _ in number");
12733                     lastub = s++;
12734                     break;
12735
12736                 /* 8 and 9 are not octal */
12737                 case '8': case '9':
12738                     if (shift == 3)
12739                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12740                     /* FALL THROUGH */
12741
12742                 /* octal digits */
12743                 case '2': case '3': case '4':
12744                 case '5': case '6': case '7':
12745                     if (shift == 1)
12746                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12747                     /* FALL THROUGH */
12748
12749                 case '0': case '1':
12750                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12751                     goto digit;
12752
12753                 /* hex digits */
12754                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12755                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12756                     /* make sure they said 0x */
12757                     if (shift != 4)
12758                         goto out;
12759                     b = (*s++ & 7) + 9;
12760
12761                     /* Prepare to put the digit we have onto the end
12762                        of the number so far.  We check for overflows.
12763                     */
12764
12765                   digit:
12766                     just_zero = FALSE;
12767                     if (!overflowed) {
12768                         x = u << shift; /* make room for the digit */
12769
12770                         if ((x >> shift) != u
12771                             && !(PL_hints & HINT_NEW_BINARY)) {
12772                             overflowed = TRUE;
12773                             n = (NV) u;
12774                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12775                                              "Integer overflow in %s number",
12776                                              base);
12777                         } else
12778                             u = x | b;          /* add the digit to the end */
12779                     }
12780                     if (overflowed) {
12781                         n *= nvshift[shift];
12782                         /* If an NV has not enough bits in its
12783                          * mantissa to represent an UV this summing of
12784                          * small low-order numbers is a waste of time
12785                          * (because the NV cannot preserve the
12786                          * low-order bits anyway): we could just
12787                          * remember when did we overflow and in the
12788                          * end just multiply n by the right
12789                          * amount. */
12790                         n += (NV) b;
12791                     }
12792                     break;
12793                 }
12794             }
12795
12796           /* if we get here, we had success: make a scalar value from
12797              the number.
12798           */
12799           out:
12800
12801             /* final misplaced underbar check */
12802             if (s[-1] == '_') {
12803                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12804             }
12805
12806             sv = newSV(0);
12807             if (overflowed) {
12808                 if (n > 4294967295.0)
12809                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12810                                    "%s number > %s non-portable",
12811                                    Base, max);
12812                 sv_setnv(sv, n);
12813             }
12814             else {
12815 #if UVSIZE > 4
12816                 if (u > 0xffffffff)
12817                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12818                                    "%s number > %s non-portable",
12819                                    Base, max);
12820 #endif
12821                 sv_setuv(sv, u);
12822             }
12823             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12824                 sv = new_constant(start, s - start, "integer",
12825                                   sv, NULL, NULL, 0);
12826             else if (PL_hints & HINT_NEW_BINARY)
12827                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12828         }
12829         break;
12830
12831     /*
12832       handle decimal numbers.
12833       we're also sent here when we read a 0 as the first digit
12834     */
12835     case '1': case '2': case '3': case '4': case '5':
12836     case '6': case '7': case '8': case '9': case '.':
12837       decimal:
12838         d = PL_tokenbuf;
12839         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12840         floatit = FALSE;
12841
12842         /* read next group of digits and _ and copy into d */
12843         while (isDIGIT(*s) || *s == '_') {
12844             /* skip underscores, checking for misplaced ones
12845                if -w is on
12846             */
12847             if (*s == '_') {
12848                 if (lastub && s == lastub + 1)
12849                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12850                                    "Misplaced _ in number");
12851                 lastub = s++;
12852             }
12853             else {
12854                 /* check for end of fixed-length buffer */
12855                 if (d >= e)
12856                     Perl_croak(aTHX_ number_too_long);
12857                 /* if we're ok, copy the character */
12858                 *d++ = *s++;
12859             }
12860         }
12861
12862         /* final misplaced underbar check */
12863         if (lastub && s == lastub + 1) {
12864             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12865         }
12866
12867         /* read a decimal portion if there is one.  avoid
12868            3..5 being interpreted as the number 3. followed
12869            by .5
12870         */
12871         if (*s == '.' && s[1] != '.') {
12872             floatit = TRUE;
12873             *d++ = *s++;
12874
12875             if (*s == '_') {
12876                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12877                                "Misplaced _ in number");
12878                 lastub = s;
12879             }
12880
12881             /* copy, ignoring underbars, until we run out of digits.
12882             */
12883             for (; isDIGIT(*s) || *s == '_'; s++) {
12884                 /* fixed length buffer check */
12885                 if (d >= e)
12886                     Perl_croak(aTHX_ number_too_long);
12887                 if (*s == '_') {
12888                    if (lastub && s == lastub + 1)
12889                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12890                                       "Misplaced _ in number");
12891                    lastub = s;
12892                 }
12893                 else
12894                     *d++ = *s;
12895             }
12896             /* fractional part ending in underbar? */
12897             if (s[-1] == '_') {
12898                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12899                                "Misplaced _ in number");
12900             }
12901             if (*s == '.' && isDIGIT(s[1])) {
12902                 /* oops, it's really a v-string, but without the "v" */
12903                 s = start;
12904                 goto vstring;
12905             }
12906         }
12907
12908         /* read exponent part, if present */
12909         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12910             floatit = TRUE;
12911             s++;
12912
12913             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12914             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12915
12916             /* stray preinitial _ */
12917             if (*s == '_') {
12918                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12919                                "Misplaced _ in number");
12920                 lastub = s++;
12921             }
12922
12923             /* allow positive or negative exponent */
12924             if (*s == '+' || *s == '-')
12925                 *d++ = *s++;
12926
12927             /* stray initial _ */
12928             if (*s == '_') {
12929                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12930                                "Misplaced _ in number");
12931                 lastub = s++;
12932             }
12933
12934             /* read digits of exponent */
12935             while (isDIGIT(*s) || *s == '_') {
12936                 if (isDIGIT(*s)) {
12937                     if (d >= e)
12938                         Perl_croak(aTHX_ number_too_long);
12939                     *d++ = *s++;
12940                 }
12941                 else {
12942                    if (((lastub && s == lastub + 1) ||
12943                         (!isDIGIT(s[1]) && s[1] != '_')))
12944                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12945                                       "Misplaced _ in number");
12946                    lastub = s++;
12947                 }
12948             }
12949         }
12950
12951
12952         /* make an sv from the string */
12953         sv = newSV(0);
12954
12955         /*
12956            We try to do an integer conversion first if no characters
12957            indicating "float" have been found.
12958          */
12959
12960         if (!floatit) {
12961             UV uv;
12962             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12963
12964             if (flags == IS_NUMBER_IN_UV) {
12965               if (uv <= IV_MAX)
12966                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12967               else
12968                 sv_setuv(sv, uv);
12969             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12970               if (uv <= (UV) IV_MIN)
12971                 sv_setiv(sv, -(IV)uv);
12972               else
12973                 floatit = TRUE;
12974             } else
12975               floatit = TRUE;
12976         }
12977         if (floatit) {
12978             /* terminate the string */
12979             *d = '\0';
12980             nv = Atof(PL_tokenbuf);
12981             sv_setnv(sv, nv);
12982         }
12983
12984         if ( floatit
12985              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12986             const char *const key = floatit ? "float" : "integer";
12987             const STRLEN keylen = floatit ? 5 : 7;
12988             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12989                                 key, keylen, sv, NULL, NULL, 0);
12990         }
12991         break;
12992
12993     /* if it starts with a v, it could be a v-string */
12994     case 'v':
12995 vstring:
12996                 sv = newSV(5); /* preallocate storage space */
12997                 s = scan_vstring(s, PL_bufend, sv);
12998         break;
12999     }
13000
13001     /* make the op for the constant and return */
13002
13003     if (sv)
13004         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13005     else
13006         lvalp->opval = NULL;
13007
13008     return (char *)s;
13009 }
13010
13011 STATIC char *
13012 S_scan_formline(pTHX_ register char *s)
13013 {
13014     dVAR;
13015     register char *eol;
13016     register char *t;
13017     SV * const stuff = newSVpvs("");
13018     bool needargs = FALSE;
13019     bool eofmt = FALSE;
13020 #ifdef PERL_MAD
13021     char *tokenstart = s;
13022     SV* savewhite = NULL;
13023
13024     if (PL_madskills) {
13025         savewhite = PL_thiswhite;
13026         PL_thiswhite = 0;
13027     }
13028 #endif
13029
13030     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13031
13032     while (!needargs) {
13033         if (*s == '.') {
13034             t = s+1;
13035 #ifdef PERL_STRICT_CR
13036             while (SPACE_OR_TAB(*t))
13037                 t++;
13038 #else
13039             while (SPACE_OR_TAB(*t) || *t == '\r')
13040                 t++;
13041 #endif
13042             if (*t == '\n' || t == PL_bufend) {
13043                 eofmt = TRUE;
13044                 break;
13045             }
13046         }
13047         if (PL_in_eval && !PL_rsfp) {
13048             eol = (char *) memchr(s,'\n',PL_bufend-s);
13049             if (!eol++)
13050                 eol = PL_bufend;
13051         }
13052         else
13053             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13054         if (*s != '#') {
13055             for (t = s; t < eol; t++) {
13056                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13057                     needargs = FALSE;
13058                     goto enough;        /* ~~ must be first line in formline */
13059                 }
13060                 if (*t == '@' || *t == '^')
13061                     needargs = TRUE;
13062             }
13063             if (eol > s) {
13064                 sv_catpvn(stuff, s, eol-s);
13065 #ifndef PERL_STRICT_CR
13066                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13067                     char *end = SvPVX(stuff) + SvCUR(stuff);
13068                     end[-2] = '\n';
13069                     end[-1] = '\0';
13070                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13071                 }
13072 #endif
13073             }
13074             else
13075               break;
13076         }
13077         s = (char*)eol;
13078         if (PL_rsfp) {
13079             bool got_some;
13080 #ifdef PERL_MAD
13081             if (PL_madskills) {
13082                 if (PL_thistoken)
13083                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13084                 else
13085                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13086             }
13087 #endif
13088             PL_bufptr = PL_bufend;
13089             CopLINE_inc(PL_curcop);
13090             got_some = lex_next_chunk(0);
13091             CopLINE_dec(PL_curcop);
13092             s = PL_bufptr;
13093 #ifdef PERL_MAD
13094             tokenstart = PL_bufptr;
13095 #endif
13096             if (!got_some)
13097                 break;
13098         }
13099         incline(s);
13100     }
13101   enough:
13102     if (SvCUR(stuff)) {
13103         PL_expect = XTERM;
13104         if (needargs) {
13105             PL_lex_state = LEX_NORMAL;
13106             start_force(PL_curforce);
13107             NEXTVAL_NEXTTOKE.ival = 0;
13108             force_next(',');
13109         }
13110         else
13111             PL_lex_state = LEX_FORMLINE;
13112         if (!IN_BYTES) {
13113             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13114                 SvUTF8_on(stuff);
13115             else if (PL_encoding)
13116                 sv_recode_to_utf8(stuff, PL_encoding);
13117         }
13118         start_force(PL_curforce);
13119         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13120         force_next(THING);
13121         start_force(PL_curforce);
13122         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13123         force_next(LSTOP);
13124     }
13125     else {
13126         SvREFCNT_dec(stuff);
13127         if (eofmt)
13128             PL_lex_formbrack = 0;
13129         PL_bufptr = s;
13130     }
13131 #ifdef PERL_MAD
13132     if (PL_madskills) {
13133         if (PL_thistoken)
13134             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13135         else
13136             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13137         PL_thiswhite = savewhite;
13138     }
13139 #endif
13140     return s;
13141 }
13142
13143 I32
13144 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13145 {
13146     dVAR;
13147     const I32 oldsavestack_ix = PL_savestack_ix;
13148     CV* const outsidecv = PL_compcv;
13149
13150     if (PL_compcv) {
13151         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13152     }
13153     SAVEI32(PL_subline);
13154     save_item(PL_subname);
13155     SAVESPTR(PL_compcv);
13156
13157     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13158     CvFLAGS(PL_compcv) |= flags;
13159
13160     PL_subline = CopLINE(PL_curcop);
13161     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13162     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13163     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13164
13165     return oldsavestack_ix;
13166 }
13167
13168 #ifdef __SC__
13169 #pragma segment Perl_yylex
13170 #endif
13171 static int
13172 S_yywarn(pTHX_ const char *const s)
13173 {
13174     dVAR;
13175
13176     PERL_ARGS_ASSERT_YYWARN;
13177
13178     PL_in_eval |= EVAL_WARNONLY;
13179     yyerror(s);
13180     PL_in_eval &= ~EVAL_WARNONLY;
13181     return 0;
13182 }
13183
13184 int
13185 Perl_yyerror(pTHX_ const char *const s)
13186 {
13187     dVAR;
13188     const char *where = NULL;
13189     const char *context = NULL;
13190     int contlen = -1;
13191     SV *msg;
13192     int yychar  = PL_parser->yychar;
13193
13194     PERL_ARGS_ASSERT_YYERROR;
13195
13196     if (!yychar || (yychar == ';' && !PL_rsfp))
13197         where = "at EOF";
13198     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13199       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13200       PL_oldbufptr != PL_bufptr) {
13201         /*
13202                 Only for NetWare:
13203                 The code below is removed for NetWare because it abends/crashes on NetWare
13204                 when the script has error such as not having the closing quotes like:
13205                     if ($var eq "value)
13206                 Checking of white spaces is anyway done in NetWare code.
13207         */
13208 #ifndef NETWARE
13209         while (isSPACE(*PL_oldoldbufptr))
13210             PL_oldoldbufptr++;
13211 #endif
13212         context = PL_oldoldbufptr;
13213         contlen = PL_bufptr - PL_oldoldbufptr;
13214     }
13215     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13216       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13217         /*
13218                 Only for NetWare:
13219                 The code below is removed for NetWare because it abends/crashes on NetWare
13220                 when the script has error such as not having the closing quotes like:
13221                     if ($var eq "value)
13222                 Checking of white spaces is anyway done in NetWare code.
13223         */
13224 #ifndef NETWARE
13225         while (isSPACE(*PL_oldbufptr))
13226             PL_oldbufptr++;
13227 #endif
13228         context = PL_oldbufptr;
13229         contlen = PL_bufptr - PL_oldbufptr;
13230     }
13231     else if (yychar > 255)
13232         where = "next token ???";
13233     else if (yychar == -2) { /* YYEMPTY */
13234         if (PL_lex_state == LEX_NORMAL ||
13235            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13236             where = "at end of line";
13237         else if (PL_lex_inpat)
13238             where = "within pattern";
13239         else
13240             where = "within string";
13241     }
13242     else {
13243         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13244         if (yychar < 32)
13245             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13246         else if (isPRINT_LC(yychar)) {
13247             const char string = yychar;
13248             sv_catpvn(where_sv, &string, 1);
13249         }
13250         else
13251             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13252         where = SvPVX_const(where_sv);
13253     }
13254     msg = sv_2mortal(newSVpv(s, 0));
13255     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13256         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13257     if (context)
13258         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13259     else
13260         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13261     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13262         Perl_sv_catpvf(aTHX_ msg,
13263         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13264                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13265         PL_multi_end = 0;
13266     }
13267     if (PL_in_eval & EVAL_WARNONLY) {
13268         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13269     }
13270     else
13271         qerror(msg);
13272     if (PL_error_count >= 10) {
13273         if (PL_in_eval && SvCUR(ERRSV))
13274             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13275                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13276         else
13277             Perl_croak(aTHX_ "%s has too many errors.\n",
13278             OutCopFILE(PL_curcop));
13279     }
13280     PL_in_my = 0;
13281     PL_in_my_stash = NULL;
13282     return 0;
13283 }
13284 #ifdef __SC__
13285 #pragma segment Main
13286 #endif
13287
13288 STATIC char*
13289 S_swallow_bom(pTHX_ U8 *s)
13290 {
13291     dVAR;
13292     const STRLEN slen = SvCUR(PL_linestr);
13293
13294     PERL_ARGS_ASSERT_SWALLOW_BOM;
13295
13296     switch (s[0]) {
13297     case 0xFF:
13298         if (s[1] == 0xFE) {
13299             /* UTF-16 little-endian? (or UTF32-LE?) */
13300             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13301                 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
13302 #ifndef PERL_NO_UTF16_FILTER
13303             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
13304             s += 2;
13305             if (PL_bufend > (char*)s) {
13306                 s = add_utf16_textfilter(s, TRUE);
13307             }
13308 #else
13309             Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
13310 #endif
13311         }
13312         break;
13313     case 0xFE:
13314         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13315 #ifndef PERL_NO_UTF16_FILTER
13316             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13317             s += 2;
13318             if (PL_bufend > (char *)s) {
13319                 s = add_utf16_textfilter(s, FALSE);
13320             }
13321 #else
13322             Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
13323 #endif
13324         }
13325         break;
13326     case 0xEF:
13327         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13328             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13329             s += 3;                      /* UTF-8 */
13330         }
13331         break;
13332     case 0:
13333         if (slen > 3) {
13334              if (s[1] == 0) {
13335                   if (s[2] == 0xFE && s[3] == 0xFF) {
13336                        /* UTF-32 big-endian */
13337                        Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
13338                   }
13339              }
13340              else if (s[2] == 0 && s[3] != 0) {
13341                   /* Leading bytes
13342                    * 00 xx 00 xx
13343                    * are a good indicator of UTF-16BE. */
13344                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13345                 s = add_utf16_textfilter(s, FALSE);
13346              }
13347         }
13348 #ifdef EBCDIC
13349     case 0xDD:
13350         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13351             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13352             s += 4;                      /* UTF-8 */
13353         }
13354         break;
13355 #endif
13356
13357     default:
13358          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13359                   /* Leading bytes
13360                    * xx 00 xx 00
13361                    * are a good indicator of UTF-16LE. */
13362               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13363               s = add_utf16_textfilter(s, TRUE);
13364          }
13365     }
13366     return (char*)s;
13367 }
13368
13369
13370 #ifndef PERL_NO_UTF16_FILTER
13371 static I32
13372 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13373 {
13374     dVAR;
13375     SV *const filter = FILTER_DATA(idx);
13376     /* We re-use this each time round, throwing the contents away before we
13377        return.  */
13378     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13379     SV *const utf8_buffer = filter;
13380     IV status = IoPAGE(filter);
13381     const bool reverse = (bool) IoLINES(filter);
13382     I32 retval;
13383
13384     /* As we're automatically added, at the lowest level, and hence only called
13385        from this file, we can be sure that we're not called in block mode. Hence
13386        don't bother writing code to deal with block mode.  */
13387     if (maxlen) {
13388         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13389     }
13390     if (status < 0) {
13391         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13392     }
13393     DEBUG_P(PerlIO_printf(Perl_debug_log,
13394                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13395                           FPTR2DPTR(void *, S_utf16_textfilter),
13396                           reverse ? 'l' : 'b', idx, maxlen, status,
13397                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13398
13399     while (1) {
13400         STRLEN chars;
13401         STRLEN have;
13402         I32 newlen;
13403         U8 *end;
13404         /* First, look in our buffer of existing UTF-8 data:  */
13405         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13406
13407         if (nl) {
13408             ++nl;
13409         } else if (status == 0) {
13410             /* EOF */
13411             IoPAGE(filter) = 0;
13412             nl = SvEND(utf8_buffer);
13413         }
13414         if (nl) {
13415             STRLEN got = nl - SvPVX(utf8_buffer);
13416             /* Did we have anything to append?  */
13417             retval = got != 0;
13418             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13419             /* Everything else in this code works just fine if SVp_POK isn't
13420                set.  This, however, needs it, and we need it to work, else
13421                we loop infinitely because the buffer is never consumed.  */
13422             sv_chop(utf8_buffer, nl);
13423             break;
13424         }
13425
13426         /* OK, not a complete line there, so need to read some more UTF-16.
13427            Read an extra octect if the buffer currently has an odd number. */
13428         while (1) {
13429             if (status <= 0)
13430                 break;
13431             if (SvCUR(utf16_buffer) >= 2) {
13432                 /* Location of the high octet of the last complete code point.
13433                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13434                    *coupled* with all the benefits of partial reads and
13435                    endianness.  */
13436                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13437                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13438
13439                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13440                     break;
13441                 }
13442
13443                 /* We have the first half of a surrogate. Read more.  */
13444                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13445             }
13446
13447             status = FILTER_READ(idx + 1, utf16_buffer,
13448                                  160 + (SvCUR(utf16_buffer) & 1));
13449             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13450             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13451             if (status < 0) {
13452                 /* Error */
13453                 IoPAGE(filter) = status;
13454                 return status;
13455             }
13456         }
13457
13458         chars = SvCUR(utf16_buffer) >> 1;
13459         have = SvCUR(utf8_buffer);
13460         SvGROW(utf8_buffer, have + chars * 3 + 1);
13461
13462         if (reverse) {
13463             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13464                                          (U8*)SvPVX_const(utf8_buffer) + have,
13465                                          chars * 2, &newlen);
13466         } else {
13467             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13468                                 (U8*)SvPVX_const(utf8_buffer) + have,
13469                                 chars * 2, &newlen);
13470         }
13471         SvCUR_set(utf8_buffer, have + newlen);
13472         *end = '\0';
13473
13474         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13475            it's private to us, and utf16_to_utf8{,reversed} take a
13476            (pointer,length) pair, rather than a NUL-terminated string.  */
13477         if(SvCUR(utf16_buffer) & 1) {
13478             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13479             SvCUR_set(utf16_buffer, 1);
13480         } else {
13481             SvCUR_set(utf16_buffer, 0);
13482         }
13483     }
13484     DEBUG_P(PerlIO_printf(Perl_debug_log,
13485                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13486                           status,
13487                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13488     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13489     return retval;
13490 }
13491
13492 static U8 *
13493 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13494 {
13495     SV *filter = filter_add(S_utf16_textfilter, NULL);
13496
13497     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13498     sv_setpvs(filter, "");
13499     IoLINES(filter) = reversed;
13500     IoPAGE(filter) = 1; /* Not EOF */
13501
13502     /* Sadly, we have to return a valid pointer, come what may, so we have to
13503        ignore any error return from this.  */
13504     SvCUR_set(PL_linestr, 0);
13505     if (FILTER_READ(0, PL_linestr, 0)) {
13506         SvUTF8_on(PL_linestr);
13507     } else {
13508         SvUTF8_on(PL_linestr);
13509     }
13510     PL_bufend = SvEND(PL_linestr);
13511     return (U8*)SvPVX(PL_linestr);
13512 }
13513 #endif
13514
13515 /*
13516 Returns a pointer to the next character after the parsed
13517 vstring, as well as updating the passed in sv.
13518
13519 Function must be called like
13520
13521         sv = newSV(5);
13522         s = scan_vstring(s,e,sv);
13523
13524 where s and e are the start and end of the string.
13525 The sv should already be large enough to store the vstring
13526 passed in, for performance reasons.
13527
13528 */
13529
13530 char *
13531 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13532 {
13533     dVAR;
13534     const char *pos = s;
13535     const char *start = s;
13536
13537     PERL_ARGS_ASSERT_SCAN_VSTRING;
13538
13539     if (*pos == 'v') pos++;  /* get past 'v' */
13540     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13541         pos++;
13542     if ( *pos != '.') {
13543         /* this may not be a v-string if followed by => */
13544         const char *next = pos;
13545         while (next < e && isSPACE(*next))
13546             ++next;
13547         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13548             /* return string not v-string */
13549             sv_setpvn(sv,(char *)s,pos-s);
13550             return (char *)pos;
13551         }
13552     }
13553
13554     if (!isALPHA(*pos)) {
13555         U8 tmpbuf[UTF8_MAXBYTES+1];
13556
13557         if (*s == 'v')
13558             s++;  /* get past 'v' */
13559
13560         sv_setpvs(sv, "");
13561
13562         for (;;) {
13563             /* this is atoi() that tolerates underscores */
13564             U8 *tmpend;
13565             UV rev = 0;
13566             const char *end = pos;
13567             UV mult = 1;
13568             while (--end >= s) {
13569                 if (*end != '_') {
13570                     const UV orev = rev;
13571                     rev += (*end - '0') * mult;
13572                     mult *= 10;
13573                     if (orev > rev)
13574                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13575                                          "Integer overflow in decimal number");
13576                 }
13577             }
13578 #ifdef EBCDIC
13579             if (rev > 0x7FFFFFFF)
13580                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13581 #endif
13582             /* Append native character for the rev point */
13583             tmpend = uvchr_to_utf8(tmpbuf, rev);
13584             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13585             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13586                  SvUTF8_on(sv);
13587             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13588                  s = ++pos;
13589             else {
13590                  s = pos;
13591                  break;
13592             }
13593             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13594                  pos++;
13595         }
13596         SvPOK_on(sv);
13597         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13598         SvRMAGICAL_on(sv);
13599     }
13600     return (char *)s;
13601 }
13602
13603 int
13604 Perl_keyword_plugin_standard(pTHX_
13605         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13606 {
13607     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13608     PERL_UNUSED_CONTEXT;
13609     PERL_UNUSED_ARG(keyword_ptr);
13610     PERL_UNUSED_ARG(keyword_len);
13611     PERL_UNUSED_ARG(op_ptr);
13612     return KEYWORD_PLUGIN_DECLINE;
13613 }
13614
13615 /*
13616  * Local variables:
13617  * c-indentation-style: bsd
13618  * c-basic-offset: 4
13619  * indent-tabs-mode: t
13620  * End:
13621  *
13622  * ex: set ts=8 sts=4 sw=4 noet:
13623  */