toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 940 {
 941     char *bufptr;
 942     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 943     if (flags & ~(LEX_STUFF_UTF8))
 944         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 945     if (UTF) {
 946         if (flags & LEX_STUFF_UTF8) {
 947             goto plain_copy;
 948         } else {
 949             STRLEN highhalf = 0;
 950             char *p, *e = pv+len;
 951             for (p = pv; p != e; p++)
 952                 highhalf += !!(((U8)*p) & 0x80);
 953             if (!highhalf)
 954                 goto plain_copy;
 955             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 956             bufptr = PL_parser->bufptr;
 957             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 958             PL_parser->bufend += len+highhalf;
 959             for (p = pv; p != e; p++) {
 960                 U8 c = (U8)*p;
 961                 if (c & 0x80) {
 962                     *bufptr++ = (char)(0xc0 | (c >> 6));
 963                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 964                 } else {
 965                     *bufptr++ = (char)c;
 966                 }
 967             }
 968         }
 969     } else {
 970         if (flags & LEX_STUFF_UTF8) {
 971             STRLEN highhalf = 0;
 972             char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 U8 c = (U8)*p;
 975                 if (c >= 0xc4) {
 976                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 977                                 "non-Latin-1 character into Latin-1 input");
 978                 } else if (c >= 0xc2 && p+1 != e &&
 979                             (((U8)p[1]) & 0xc0) == 0x80) {
 980                     p++;
 981                     highhalf++;
 982                 } else if (c >= 0x80) {
 983                     /* malformed UTF-8 */
 984                     ENTER;
 985                     SAVESPTR(PL_warnhook);
 986                     PL_warnhook = PERL_WARNHOOK_FATAL;
 987                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 988                     LEAVE;
 989                 }
 990             }
 991             if (!highhalf)
 992                 goto plain_copy;
 993             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 994             bufptr = PL_parser->bufptr;
 995             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 996             PL_parser->bufend += len-highhalf;
 997             for (p = pv; p != e; p++) {
 998                 U8 c = (U8)*p;
 999                 if (c & 0x80) {
1000                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001                     p++;
1002                 } else {
1003                     *bufptr++ = (char)c;
1004                 }
1005             }
1006         } else {
1007             plain_copy:
1008             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009             bufptr = PL_parser->bufptr;
1010             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is the string value of I<sv>.  The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039 {
1040     char *pv;
1041     STRLEN len;
1042     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043     if (flags)
1044         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045     pv = SvPV(sv, len);
1046     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1056
1057 This is not the normal way to consume lexed text.  For that, use
1058 L</lex_read_to>.
1059
1060 =cut
1061 */
1062
1063 void
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1065 {
1066     char *buf, *bufend;
1067     STRLEN unstuff_len;
1068     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069     buf = PL_parser->bufptr;
1070     if (ptr < buf)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072     if (ptr == buf)
1073         return;
1074     bufend = PL_parser->bufend;
1075     if (ptr > bufend)
1076         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077     unstuff_len = ptr - buf;
1078     Move(ptr, buf, bufend+1-ptr, char);
1079     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080     PL_parser->bufend = bufend - unstuff_len;
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1085
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1090
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_read_to(pTHX_ char *ptr)
1100 {
1101     char *s;
1102     PERL_ARGS_ASSERT_LEX_READ_TO;
1103     s = PL_parser->bufptr;
1104     if (ptr < s || ptr > PL_parser->bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106     for (; s != ptr; s++)
1107         if (*s == '\n') {
1108             CopLINE_inc(PL_curcop);
1109             PL_parser->linestart = s+1;
1110         }
1111     PL_parser->bufptr = ptr;
1112 }
1113
1114 /*
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>.  The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately.  I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1122
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it.  However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer fof
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1130
1131 =cut
1132 */
1133
1134 void
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1136 {
1137     char *buf;
1138     STRLEN discard_len;
1139     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140     buf = SvPVX(PL_parser->linestr);
1141     if (ptr < buf)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143     if (ptr == buf)
1144         return;
1145     if (ptr > PL_parser->bufptr)
1146         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147     discard_len = ptr - buf;
1148     if (PL_parser->oldbufptr < ptr)
1149         PL_parser->oldbufptr = ptr;
1150     if (PL_parser->oldoldbufptr < ptr)
1151         PL_parser->oldoldbufptr = ptr;
1152     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153         PL_parser->last_uni = NULL;
1154     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155         PL_parser->last_lop = NULL;
1156     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158     PL_parser->bufend -= discard_len;
1159     PL_parser->bufptr -= discard_len;
1160     PL_parser->oldbufptr -= discard_len;
1161     PL_parser->oldoldbufptr -= discard_len;
1162     if (PL_parser->last_uni)
1163         PL_parser->last_uni -= discard_len;
1164     if (PL_parser->last_lop)
1165         PL_parser->last_lop -= discard_len;
1166 }
1167
1168 /*
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more.  It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1176
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded.  If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1183
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1186
1187 =cut
1188 */
1189
1190 #define LEX_FAKE_EOF 0x80000000
1191
1192 bool
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1194 {
1195     SV *linestr;
1196     char *buf;
1197     STRLEN old_bufend_pos, new_bufend_pos;
1198     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200     bool got_some_for_debugger = 0;
1201     bool got_some;
1202     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1204     linestr = PL_parser->linestr;
1205     buf = SvPVX(linestr);
1206     if (!(flags & LEX_KEEP_PREVIOUS) &&
1207             PL_parser->bufptr == PL_parser->bufend) {
1208         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1209         linestart_pos = 0;
1210         if (PL_parser->last_uni != PL_parser->bufend)
1211             PL_parser->last_uni = NULL;
1212         if (PL_parser->last_lop != PL_parser->bufend)
1213             PL_parser->last_lop = NULL;
1214         last_uni_pos = last_lop_pos = 0;
1215         *buf = 0;
1216         SvCUR(linestr) = 0;
1217     } else {
1218         old_bufend_pos = PL_parser->bufend - buf;
1219         bufptr_pos = PL_parser->bufptr - buf;
1220         oldbufptr_pos = PL_parser->oldbufptr - buf;
1221         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222         linestart_pos = PL_parser->linestart - buf;
1223         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1225     }
1226     if (flags & LEX_FAKE_EOF) {
1227         goto eof;
1228     } else if (!PL_parser->rsfp) {
1229         got_some = 0;
1230     } else if (filter_gets(linestr, old_bufend_pos)) {
1231         got_some = 1;
1232         got_some_for_debugger = 1;
1233     } else {
1234         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1235             sv_setpvs(linestr, "");
1236         eof:
1237         /* End of real input.  Close filehandle (unless it was STDIN),
1238          * then add implicit termination.
1239          */
1240         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241             PerlIO_clearerr(PL_parser->rsfp);
1242         else if (PL_parser->rsfp)
1243             (void)PerlIO_close(PL_parser->rsfp);
1244         PL_parser->rsfp = NULL;
1245         PL_doextract = FALSE;
1246 #ifdef PERL_MAD
1247         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1248             PL_faketokens = 1;
1249 #endif
1250         if (!PL_in_eval && PL_minus_p) {
1251             sv_catpvs(linestr,
1252                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253             PL_minus_n = PL_minus_p = 0;
1254         } else if (!PL_in_eval && PL_minus_n) {
1255             sv_catpvs(linestr, /*{*/";}");
1256             PL_minus_n = 0;
1257         } else
1258             sv_catpvs(linestr, ";");
1259         got_some = 1;
1260     }
1261     buf = SvPVX(linestr);
1262     new_bufend_pos = SvCUR(linestr);
1263     PL_parser->bufend = buf + new_bufend_pos;
1264     PL_parser->bufptr = buf + bufptr_pos;
1265     PL_parser->oldbufptr = buf + oldbufptr_pos;
1266     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267     PL_parser->linestart = buf + linestart_pos;
1268     if (PL_parser->last_uni)
1269         PL_parser->last_uni = buf + last_uni_pos;
1270     if (PL_parser->last_lop)
1271         PL_parser->last_lop = buf + last_lop_pos;
1272     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1273             PL_curstash != PL_debstash) {
1274         /* debugger active and we're not compiling the debugger code,
1275          * so store the line into the debugger's array of lines
1276          */
1277         update_debugger_info(NULL, buf+old_bufend_pos,
1278             new_bufend_pos-old_bufend_pos);
1279     }
1280     return got_some;
1281 }
1282
1283 /*
1284 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1285
1286 Looks ahead one (Unicode) character in the text currently being lexed.
1287 Returns the codepoint (unsigned integer value) of the next character,
1288 or -1 if lexing has reached the end of the input text.  To consume the
1289 peeked character, use L</lex_read_unichar>.
1290
1291 If the next character is in (or extends into) the next chunk of input
1292 text, the next chunk will be read in.  Normally the current chunk will be
1293 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294 then the current chunk will not be discarded.
1295
1296 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297 is encountered, an exception is generated.
1298
1299 =cut
1300 */
1301
1302 I32
1303 Perl_lex_peek_unichar(pTHX_ U32 flags)
1304 {
1305     char *s, *bufend;
1306     if (flags & ~(LEX_KEEP_PREVIOUS))
1307         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1308     s = PL_parser->bufptr;
1309     bufend = PL_parser->bufend;
1310     if (UTF) {
1311         U8 head;
1312         I32 unichar;
1313         STRLEN len, retlen;
1314         if (s == bufend) {
1315             if (!lex_next_chunk(flags))
1316                 return -1;
1317             s = PL_parser->bufptr;
1318             bufend = PL_parser->bufend;
1319         }
1320         head = (U8)*s;
1321         if (!(head & 0x80))
1322             return head;
1323         if (head & 0x40) {
1324             len = PL_utf8skip[head];
1325             while ((STRLEN)(bufend-s) < len) {
1326                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1327                     break;
1328                 s = PL_parser->bufptr;
1329                 bufend = PL_parser->bufend;
1330             }
1331         }
1332         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1333         if (retlen == (STRLEN)-1) {
1334             /* malformed UTF-8 */
1335             ENTER;
1336             SAVESPTR(PL_warnhook);
1337             PL_warnhook = PERL_WARNHOOK_FATAL;
1338             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1339             LEAVE;
1340         }
1341         return unichar;
1342     } else {
1343         if (s == bufend) {
1344             if (!lex_next_chunk(flags))
1345                 return -1;
1346             s = PL_parser->bufptr;
1347         }
1348         return (U8)*s;
1349     }
1350 }
1351
1352 /*
1353 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1354
1355 Reads the next (Unicode) character in the text currently being lexed.
1356 Returns the codepoint (unsigned integer value) of the character read,
1357 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1358 if lexing has reached the end of the input text.  To non-destructively
1359 examine the next character, use L</lex_peek_unichar> instead.
1360
1361 If the next character is in (or extends into) the next chunk of input
1362 text, the next chunk will be read in.  Normally the current chunk will be
1363 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1364 then the current chunk will not be discarded.
1365
1366 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367 is encountered, an exception is generated.
1368
1369 =cut
1370 */
1371
1372 I32
1373 Perl_lex_read_unichar(pTHX_ U32 flags)
1374 {
1375     I32 c;
1376     if (flags & ~(LEX_KEEP_PREVIOUS))
1377         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1378     c = lex_peek_unichar(flags);
1379     if (c != -1) {
1380         if (c == '\n')
1381             CopLINE_inc(PL_curcop);
1382         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1383     }
1384     return c;
1385 }
1386
1387 /*
1388 =for apidoc Amx|void|lex_read_space|U32 flags
1389
1390 Reads optional spaces, in Perl style, in the text currently being
1391 lexed.  The spaces may include ordinary whitespace characters and
1392 Perl-style comments.  C<#line> directives are processed if encountered.
1393 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1394 at a non-space character (or the end of the input text).
1395
1396 If spaces extend into the next chunk of input text, the next chunk will
1397 be read in.  Normally the current chunk will be discarded at the same
1398 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1399 chunk will not be discarded.
1400
1401 =cut
1402 */
1403
1404 #define LEX_NO_NEXT_CHUNK 0x80000000
1405
1406 void
1407 Perl_lex_read_space(pTHX_ U32 flags)
1408 {
1409     char *s, *bufend;
1410     bool need_incline = 0;
1411     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1412         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1413 #ifdef PERL_MAD
1414     if (PL_skipwhite) {
1415         sv_free(PL_skipwhite);
1416         PL_skipwhite = NULL;
1417     }
1418     if (PL_madskills)
1419         PL_skipwhite = newSVpvs("");
1420 #endif /* PERL_MAD */
1421     s = PL_parser->bufptr;
1422     bufend = PL_parser->bufend;
1423     while (1) {
1424         char c = *s;
1425         if (c == '#') {
1426             do {
1427                 c = *++s;
1428             } while (!(c == '\n' || (c == 0 && s == bufend)));
1429         } else if (c == '\n') {
1430             s++;
1431             PL_parser->linestart = s;
1432             if (s == bufend)
1433                 need_incline = 1;
1434             else
1435                 incline(s);
1436         } else if (isSPACE(c)) {
1437             s++;
1438         } else if (c == 0 && s == bufend) {
1439             bool got_more;
1440 #ifdef PERL_MAD
1441             if (PL_madskills)
1442                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1443 #endif /* PERL_MAD */
1444             if (flags & LEX_NO_NEXT_CHUNK)
1445                 break;
1446             PL_parser->bufptr = s;
1447             CopLINE_inc(PL_curcop);
1448             got_more = lex_next_chunk(flags);
1449             CopLINE_dec(PL_curcop);
1450             s = PL_parser->bufptr;
1451             bufend = PL_parser->bufend;
1452             if (!got_more)
1453                 break;
1454             if (need_incline && PL_parser->rsfp) {
1455                 incline(s);
1456                 need_incline = 0;
1457             }
1458         } else {
1459             break;
1460         }
1461     }
1462 #ifdef PERL_MAD
1463     if (PL_madskills)
1464         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1465 #endif /* PERL_MAD */
1466     PL_parser->bufptr = s;
1467 }
1468
1469 /*
1470  * S_incline
1471  * This subroutine has nothing to do with tilting, whether at windmills
1472  * or pinball tables.  Its name is short for "increment line".  It
1473  * increments the current line number in CopLINE(PL_curcop) and checks
1474  * to see whether the line starts with a comment of the form
1475  *    # line 500 "foo.pm"
1476  * If so, it sets the current line number and file to the values in the comment.
1477  */
1478
1479 STATIC void
1480 S_incline(pTHX_ const char *s)
1481 {
1482     dVAR;
1483     const char *t;
1484     const char *n;
1485     const char *e;
1486
1487     PERL_ARGS_ASSERT_INCLINE;
1488
1489     CopLINE_inc(PL_curcop);
1490     if (*s++ != '#')
1491         return;
1492     while (SPACE_OR_TAB(*s))
1493         s++;
1494     if (strnEQ(s, "line", 4))
1495         s += 4;
1496     else
1497         return;
1498     if (SPACE_OR_TAB(*s))
1499         s++;
1500     else
1501         return;
1502     while (SPACE_OR_TAB(*s))
1503         s++;
1504     if (!isDIGIT(*s))
1505         return;
1506
1507     n = s;
1508     while (isDIGIT(*s))
1509         s++;
1510     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1511         return;
1512     while (SPACE_OR_TAB(*s))
1513         s++;
1514     if (*s == '"' && (t = strchr(s+1, '"'))) {
1515         s++;
1516         e = t + 1;
1517     }
1518     else {
1519         t = s;
1520         while (!isSPACE(*t))
1521             t++;
1522         e = t;
1523     }
1524     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1525         e++;
1526     if (*e != '\n' && *e != '\0')
1527         return;         /* false alarm */
1528
1529     if (t - s > 0) {
1530         const STRLEN len = t - s;
1531 #ifndef USE_ITHREADS
1532         SV *const temp_sv = CopFILESV(PL_curcop);
1533         const char *cf;
1534         STRLEN tmplen;
1535
1536         if (temp_sv) {
1537             cf = SvPVX(temp_sv);
1538             tmplen = SvCUR(temp_sv);
1539         } else {
1540             cf = NULL;
1541             tmplen = 0;
1542         }
1543
1544         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1545             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1546              * to *{"::_<newfilename"} */
1547             /* However, the long form of evals is only turned on by the
1548                debugger - usually they're "(eval %lu)" */
1549             char smallbuf[128];
1550             char *tmpbuf;
1551             GV **gvp;
1552             STRLEN tmplen2 = len;
1553             if (tmplen + 2 <= sizeof smallbuf)
1554                 tmpbuf = smallbuf;
1555             else
1556                 Newx(tmpbuf, tmplen + 2, char);
1557             tmpbuf[0] = '_';
1558             tmpbuf[1] = '<';
1559             memcpy(tmpbuf + 2, cf, tmplen);
1560             tmplen += 2;
1561             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1562             if (gvp) {
1563                 char *tmpbuf2;
1564                 GV *gv2;
1565
1566                 if (tmplen2 + 2 <= sizeof smallbuf)
1567                     tmpbuf2 = smallbuf;
1568                 else
1569                     Newx(tmpbuf2, tmplen2 + 2, char);
1570
1571                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1572                     /* Either they malloc'd it, or we malloc'd it,
1573                        so no prefix is present in ours.  */
1574                     tmpbuf2[0] = '_';
1575                     tmpbuf2[1] = '<';
1576                 }
1577
1578                 memcpy(tmpbuf2 + 2, s, tmplen2);
1579                 tmplen2 += 2;
1580
1581                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1582                 if (!isGV(gv2)) {
1583                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1584                     /* adjust ${"::_<newfilename"} to store the new file name */
1585                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1586                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1587                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1588                 }
1589
1590                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1591             }
1592             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1593         }
1594 #endif
1595         CopFILE_free(PL_curcop);
1596         CopFILE_setn(PL_curcop, s, len);
1597     }
1598     CopLINE_set(PL_curcop, atoi(n)-1);
1599 }
1600
1601 #ifdef PERL_MAD
1602 /* skip space before PL_thistoken */
1603
1604 STATIC char *
1605 S_skipspace0(pTHX_ register char *s)
1606 {
1607     PERL_ARGS_ASSERT_SKIPSPACE0;
1608
1609     s = skipspace(s);
1610     if (!PL_madskills)
1611         return s;
1612     if (PL_skipwhite) {
1613         if (!PL_thiswhite)
1614             PL_thiswhite = newSVpvs("");
1615         sv_catsv(PL_thiswhite, PL_skipwhite);
1616         sv_free(PL_skipwhite);
1617         PL_skipwhite = 0;
1618     }
1619     PL_realtokenstart = s - SvPVX(PL_linestr);
1620     return s;
1621 }
1622
1623 /* skip space after PL_thistoken */
1624
1625 STATIC char *
1626 S_skipspace1(pTHX_ register char *s)
1627 {
1628     const char *start = s;
1629     I32 startoff = start - SvPVX(PL_linestr);
1630
1631     PERL_ARGS_ASSERT_SKIPSPACE1;
1632
1633     s = skipspace(s);
1634     if (!PL_madskills)
1635         return s;
1636     start = SvPVX(PL_linestr) + startoff;
1637     if (!PL_thistoken && PL_realtokenstart >= 0) {
1638         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1639         PL_thistoken = newSVpvn(tstart, start - tstart);
1640     }
1641     PL_realtokenstart = -1;
1642     if (PL_skipwhite) {
1643         if (!PL_nextwhite)
1644             PL_nextwhite = newSVpvs("");
1645         sv_catsv(PL_nextwhite, PL_skipwhite);
1646         sv_free(PL_skipwhite);
1647         PL_skipwhite = 0;
1648     }
1649     return s;
1650 }
1651
1652 STATIC char *
1653 S_skipspace2(pTHX_ register char *s, SV **svp)
1654 {
1655     char *start;
1656     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1657     const I32 startoff = s - SvPVX(PL_linestr);
1658
1659     PERL_ARGS_ASSERT_SKIPSPACE2;
1660
1661     s = skipspace(s);
1662     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1663     if (!PL_madskills || !svp)
1664         return s;
1665     start = SvPVX(PL_linestr) + startoff;
1666     if (!PL_thistoken && PL_realtokenstart >= 0) {
1667         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1668         PL_thistoken = newSVpvn(tstart, start - tstart);
1669         PL_realtokenstart = -1;
1670     }
1671     if (PL_skipwhite) {
1672         if (!*svp)
1673             *svp = newSVpvs("");
1674         sv_setsv(*svp, PL_skipwhite);
1675         sv_free(PL_skipwhite);
1676         PL_skipwhite = 0;
1677     }
1678
1679     return s;
1680 }
1681 #endif
1682
1683 STATIC void
1684 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1685 {
1686     AV *av = CopFILEAVx(PL_curcop);
1687     if (av) {
1688         SV * const sv = newSV_type(SVt_PVMG);
1689         if (orig_sv)
1690             sv_setsv(sv, orig_sv);
1691         else
1692             sv_setpvn(sv, buf, len);
1693         (void)SvIOK_on(sv);
1694         SvIV_set(sv, 0);
1695         av_store(av, (I32)CopLINE(PL_curcop), sv);
1696     }
1697 }
1698
1699 /*
1700  * S_skipspace
1701  * Called to gobble the appropriate amount and type of whitespace.
1702  * Skips comments as well.
1703  */
1704
1705 STATIC char *
1706 S_skipspace(pTHX_ register char *s)
1707 {
1708 #ifdef PERL_MAD
1709     char *start = s;
1710 #endif /* PERL_MAD */
1711     PERL_ARGS_ASSERT_SKIPSPACE;
1712 #ifdef PERL_MAD
1713     if (PL_skipwhite) {
1714         sv_free(PL_skipwhite);
1715         PL_skipwhite = NULL;
1716     }
1717 #endif /* PERL_MAD */
1718     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1719         while (s < PL_bufend && SPACE_OR_TAB(*s))
1720             s++;
1721     } else {
1722         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1723         PL_bufptr = s;
1724         lex_read_space(LEX_KEEP_PREVIOUS |
1725                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1726                     LEX_NO_NEXT_CHUNK : 0));
1727         s = PL_bufptr;
1728         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1729         if (PL_linestart > PL_bufptr)
1730             PL_bufptr = PL_linestart;
1731         return s;
1732     }
1733 #ifdef PERL_MAD
1734     if (PL_madskills)
1735         PL_skipwhite = newSVpvn(start, s-start);
1736 #endif /* PERL_MAD */
1737     return s;
1738 }
1739
1740 /*
1741  * S_check_uni
1742  * Check the unary operators to ensure there's no ambiguity in how they're
1743  * used.  An ambiguous piece of code would be:
1744  *     rand + 5
1745  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1746  * the +5 is its argument.
1747  */
1748
1749 STATIC void
1750 S_check_uni(pTHX)
1751 {
1752     dVAR;
1753     const char *s;
1754     const char *t;
1755
1756     if (PL_oldoldbufptr != PL_last_uni)
1757         return;
1758     while (isSPACE(*PL_last_uni))
1759         PL_last_uni++;
1760     s = PL_last_uni;
1761     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1762         s++;
1763     if ((t = strchr(s, '(')) && t < PL_bufptr)
1764         return;
1765
1766     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1767                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1768                      (int)(s - PL_last_uni), PL_last_uni);
1769 }
1770
1771 /*
1772  * LOP : macro to build a list operator.  Its behaviour has been replaced
1773  * with a subroutine, S_lop() for which LOP is just another name.
1774  */
1775
1776 #define LOP(f,x) return lop(f,x,s)
1777
1778 /*
1779  * S_lop
1780  * Build a list operator (or something that might be one).  The rules:
1781  *  - if we have a next token, then it's a list operator [why?]
1782  *  - if the next thing is an opening paren, then it's a function
1783  *  - else it's a list operator
1784  */
1785
1786 STATIC I32
1787 S_lop(pTHX_ I32 f, int x, char *s)
1788 {
1789     dVAR;
1790
1791     PERL_ARGS_ASSERT_LOP;
1792
1793     pl_yylval.ival = f;
1794     CLINE;
1795     PL_expect = x;
1796     PL_bufptr = s;
1797     PL_last_lop = PL_oldbufptr;
1798     PL_last_lop_op = (OPCODE)f;
1799 #ifdef PERL_MAD
1800     if (PL_lasttoke)
1801         return REPORT(LSTOP);
1802 #else
1803     if (PL_nexttoke)
1804         return REPORT(LSTOP);
1805 #endif
1806     if (*s == '(')
1807         return REPORT(FUNC);
1808     s = PEEKSPACE(s);
1809     if (*s == '(')
1810         return REPORT(FUNC);
1811     else
1812         return REPORT(LSTOP);
1813 }
1814
1815 #ifdef PERL_MAD
1816  /*
1817  * S_start_force
1818  * Sets up for an eventual force_next().  start_force(0) basically does
1819  * an unshift, while start_force(-1) does a push.  yylex removes items
1820  * on the "pop" end.
1821  */
1822
1823 STATIC void
1824 S_start_force(pTHX_ int where)
1825 {
1826     int i;
1827
1828     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1829         where = PL_lasttoke;
1830     assert(PL_curforce < 0 || PL_curforce == where);
1831     if (PL_curforce != where) {
1832         for (i = PL_lasttoke; i > where; --i) {
1833             PL_nexttoke[i] = PL_nexttoke[i-1];
1834         }
1835         PL_lasttoke++;
1836     }
1837     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1838         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1839     PL_curforce = where;
1840     if (PL_nextwhite) {
1841         if (PL_madskills)
1842             curmad('^', newSVpvs(""));
1843         CURMAD('_', PL_nextwhite);
1844     }
1845 }
1846
1847 STATIC void
1848 S_curmad(pTHX_ char slot, SV *sv)
1849 {
1850     MADPROP **where;
1851
1852     if (!sv)
1853         return;
1854     if (PL_curforce < 0)
1855         where = &PL_thismad;
1856     else
1857         where = &PL_nexttoke[PL_curforce].next_mad;
1858
1859     if (PL_faketokens)
1860         sv_setpvs(sv, "");
1861     else {
1862         if (!IN_BYTES) {
1863             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1864                 SvUTF8_on(sv);
1865             else if (PL_encoding) {
1866                 sv_recode_to_utf8(sv, PL_encoding);
1867             }
1868         }
1869     }
1870
1871     /* keep a slot open for the head of the list? */
1872     if (slot != '_' && *where && (*where)->mad_key == '^') {
1873         (*where)->mad_key = slot;
1874         sv_free(MUTABLE_SV(((*where)->mad_val)));
1875         (*where)->mad_val = (void*)sv;
1876     }
1877     else
1878         addmad(newMADsv(slot, sv), where, 0);
1879 }
1880 #else
1881 #  define start_force(where)    NOOP
1882 #  define curmad(slot, sv)      NOOP
1883 #endif
1884
1885 /*
1886  * S_force_next
1887  * When the lexer realizes it knows the next token (for instance,
1888  * it is reordering tokens for the parser) then it can call S_force_next
1889  * to know what token to return the next time the lexer is called.  Caller
1890  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1891  * and possibly PL_expect to ensure the lexer handles the token correctly.
1892  */
1893
1894 STATIC void
1895 S_force_next(pTHX_ I32 type)
1896 {
1897     dVAR;
1898 #ifdef DEBUGGING
1899     if (DEBUG_T_TEST) {
1900         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1901         tokereport(type, &NEXTVAL_NEXTTOKE);
1902     }
1903 #endif
1904 #ifdef PERL_MAD
1905     if (PL_curforce < 0)
1906         start_force(PL_lasttoke);
1907     PL_nexttoke[PL_curforce].next_type = type;
1908     if (PL_lex_state != LEX_KNOWNEXT)
1909         PL_lex_defer = PL_lex_state;
1910     PL_lex_state = LEX_KNOWNEXT;
1911     PL_lex_expect = PL_expect;
1912     PL_curforce = -1;
1913 #else
1914     PL_nexttype[PL_nexttoke] = type;
1915     PL_nexttoke++;
1916     if (PL_lex_state != LEX_KNOWNEXT) {
1917         PL_lex_defer = PL_lex_state;
1918         PL_lex_expect = PL_expect;
1919         PL_lex_state = LEX_KNOWNEXT;
1920     }
1921 #endif
1922 }
1923
1924 STATIC SV *
1925 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1926 {
1927     dVAR;
1928     SV * const sv = newSVpvn_utf8(start, len,
1929                                   !IN_BYTES
1930                                   && UTF
1931                                   && !is_ascii_string((const U8*)start, len)
1932                                   && is_utf8_string((const U8*)start, len));
1933     return sv;
1934 }
1935
1936 /*
1937  * S_force_word
1938  * When the lexer knows the next thing is a word (for instance, it has
1939  * just seen -> and it knows that the next char is a word char, then
1940  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1941  * lookahead.
1942  *
1943  * Arguments:
1944  *   char *start : buffer position (must be within PL_linestr)
1945  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1946  *   int check_keyword : if true, Perl checks to make sure the word isn't
1947  *       a keyword (do this if the word is a label, e.g. goto FOO)
1948  *   int allow_pack : if true, : characters will also be allowed (require,
1949  *       use, etc. do this)
1950  *   int allow_initial_tick : used by the "sub" lexer only.
1951  */
1952
1953 STATIC char *
1954 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1955 {
1956     dVAR;
1957     register char *s;
1958     STRLEN len;
1959
1960     PERL_ARGS_ASSERT_FORCE_WORD;
1961
1962     start = SKIPSPACE1(start);
1963     s = start;
1964     if (isIDFIRST_lazy_if(s,UTF) ||
1965         (allow_pack && *s == ':') ||
1966         (allow_initial_tick && *s == '\'') )
1967     {
1968         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1969         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1970             return start;
1971         start_force(PL_curforce);
1972         if (PL_madskills)
1973             curmad('X', newSVpvn(start,s-start));
1974         if (token == METHOD) {
1975             s = SKIPSPACE1(s);
1976             if (*s == '(')
1977                 PL_expect = XTERM;
1978             else {
1979                 PL_expect = XOPERATOR;
1980             }
1981         }
1982         if (PL_madskills)
1983             curmad('g', newSVpvs( "forced" ));
1984         NEXTVAL_NEXTTOKE.opval
1985             = (OP*)newSVOP(OP_CONST,0,
1986                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1987         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1988         force_next(token);
1989     }
1990     return s;
1991 }
1992
1993 /*
1994  * S_force_ident
1995  * Called when the lexer wants $foo *foo &foo etc, but the program
1996  * text only contains the "foo" portion.  The first argument is a pointer
1997  * to the "foo", and the second argument is the type symbol to prefix.
1998  * Forces the next token to be a "WORD".
1999  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2000  */
2001
2002 STATIC void
2003 S_force_ident(pTHX_ register const char *s, int kind)
2004 {
2005     dVAR;
2006
2007     PERL_ARGS_ASSERT_FORCE_IDENT;
2008
2009     if (*s) {
2010         const STRLEN len = strlen(s);
2011         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2012         start_force(PL_curforce);
2013         NEXTVAL_NEXTTOKE.opval = o;
2014         force_next(WORD);
2015         if (kind) {
2016             o->op_private = OPpCONST_ENTERED;
2017             /* XXX see note in pp_entereval() for why we forgo typo
2018                warnings if the symbol must be introduced in an eval.
2019                GSAR 96-10-12 */
2020             gv_fetchpvn_flags(s, len,
2021                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2022                               : GV_ADD,
2023                               kind == '$' ? SVt_PV :
2024                               kind == '@' ? SVt_PVAV :
2025                               kind == '%' ? SVt_PVHV :
2026                               SVt_PVGV
2027                               );
2028         }
2029     }
2030 }
2031
2032 NV
2033 Perl_str_to_version(pTHX_ SV *sv)
2034 {
2035     NV retval = 0.0;
2036     NV nshift = 1.0;
2037     STRLEN len;
2038     const char *start = SvPV_const(sv,len);
2039     const char * const end = start + len;
2040     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2041
2042     PERL_ARGS_ASSERT_STR_TO_VERSION;
2043
2044     while (start < end) {
2045         STRLEN skip;
2046         UV n;
2047         if (utf)
2048             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2049         else {
2050             n = *(U8*)start;
2051             skip = 1;
2052         }
2053         retval += ((NV)n)/nshift;
2054         start += skip;
2055         nshift *= 1000;
2056     }
2057     return retval;
2058 }
2059
2060 /*
2061  * S_force_version
2062  * Forces the next token to be a version number.
2063  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2064  * and if "guessing" is TRUE, then no new token is created (and the caller
2065  * must use an alternative parsing method).
2066  */
2067
2068 STATIC char *
2069 S_force_version(pTHX_ char *s, int guessing)
2070 {
2071     dVAR;
2072     OP *version = NULL;
2073     char *d;
2074 #ifdef PERL_MAD
2075     I32 startoff = s - SvPVX(PL_linestr);
2076 #endif
2077
2078     PERL_ARGS_ASSERT_FORCE_VERSION;
2079
2080     s = SKIPSPACE1(s);
2081
2082     d = s;
2083     if (*d == 'v')
2084         d++;
2085     if (isDIGIT(*d)) {
2086         while (isDIGIT(*d) || *d == '_' || *d == '.')
2087             d++;
2088 #ifdef PERL_MAD
2089         if (PL_madskills) {
2090             start_force(PL_curforce);
2091             curmad('X', newSVpvn(s,d-s));
2092         }
2093 #endif
2094         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2095             SV *ver;
2096             s = scan_num(s, &pl_yylval);
2097             version = pl_yylval.opval;
2098             ver = cSVOPx(version)->op_sv;
2099             if (SvPOK(ver) && !SvNIOK(ver)) {
2100                 SvUPGRADE(ver, SVt_PVNV);
2101                 SvNV_set(ver, str_to_version(ver));
2102                 SvNOK_on(ver);          /* hint that it is a version */
2103             }
2104         }
2105         else if (guessing) {
2106 #ifdef PERL_MAD
2107             if (PL_madskills) {
2108                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2109                 PL_nextwhite = 0;
2110                 s = SvPVX(PL_linestr) + startoff;
2111             }
2112 #endif
2113             return s;
2114         }
2115     }
2116
2117 #ifdef PERL_MAD
2118     if (PL_madskills && !version) {
2119         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2120         PL_nextwhite = 0;
2121         s = SvPVX(PL_linestr) + startoff;
2122     }
2123 #endif
2124     /* NOTE: The parser sees the package name and the VERSION swapped */
2125     start_force(PL_curforce);
2126     NEXTVAL_NEXTTOKE.opval = version;
2127     force_next(WORD);
2128
2129     return s;
2130 }
2131
2132 /*
2133  * S_tokeq
2134  * Tokenize a quoted string passed in as an SV.  It finds the next
2135  * chunk, up to end of string or a backslash.  It may make a new
2136  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2137  * turns \\ into \.
2138  */
2139
2140 STATIC SV *
2141 S_tokeq(pTHX_ SV *sv)
2142 {
2143     dVAR;
2144     register char *s;
2145     register char *send;
2146     register char *d;
2147     STRLEN len = 0;
2148     SV *pv = sv;
2149
2150     PERL_ARGS_ASSERT_TOKEQ;
2151
2152     if (!SvLEN(sv))
2153         goto finish;
2154
2155     s = SvPV_force(sv, len);
2156     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2157         goto finish;
2158     send = s + len;
2159     while (s < send && *s != '\\')
2160         s++;
2161     if (s == send)
2162         goto finish;
2163     d = s;
2164     if ( PL_hints & HINT_NEW_STRING ) {
2165         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2166     }
2167     while (s < send) {
2168         if (*s == '\\') {
2169             if (s + 1 < send && (s[1] == '\\'))
2170                 s++;            /* all that, just for this */
2171         }
2172         *d++ = *s++;
2173     }
2174     *d = '\0';
2175     SvCUR_set(sv, d - SvPVX_const(sv));
2176   finish:
2177     if ( PL_hints & HINT_NEW_STRING )
2178        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2179     return sv;
2180 }
2181
2182 /*
2183  * Now come three functions related to double-quote context,
2184  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2185  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2186  * interact with PL_lex_state, and create fake ( ... ) argument lists
2187  * to handle functions and concatenation.
2188  * They assume that whoever calls them will be setting up a fake
2189  * join call, because each subthing puts a ',' after it.  This lets
2190  *   "lower \luPpEr"
2191  * become
2192  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2193  *
2194  * (I'm not sure whether the spurious commas at the end of lcfirst's
2195  * arguments and join's arguments are created or not).
2196  */
2197
2198 /*
2199  * S_sublex_start
2200  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2201  *
2202  * Pattern matching will set PL_lex_op to the pattern-matching op to
2203  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2204  *
2205  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2206  *
2207  * Everything else becomes a FUNC.
2208  *
2209  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2210  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2211  * call to S_sublex_push().
2212  */
2213
2214 STATIC I32
2215 S_sublex_start(pTHX)
2216 {
2217     dVAR;
2218     register const I32 op_type = pl_yylval.ival;
2219
2220     if (op_type == OP_NULL) {
2221         pl_yylval.opval = PL_lex_op;
2222         PL_lex_op = NULL;
2223         return THING;
2224     }
2225     if (op_type == OP_CONST || op_type == OP_READLINE) {
2226         SV *sv = tokeq(PL_lex_stuff);
2227
2228         if (SvTYPE(sv) == SVt_PVIV) {
2229             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2230             STRLEN len;
2231             const char * const p = SvPV_const(sv, len);
2232             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2233             SvREFCNT_dec(sv);
2234             sv = nsv;
2235         }
2236         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2237         PL_lex_stuff = NULL;
2238         /* Allow <FH> // "foo" */
2239         if (op_type == OP_READLINE)
2240             PL_expect = XTERMORDORDOR;
2241         return THING;
2242     }
2243     else if (op_type == OP_BACKTICK && PL_lex_op) {
2244         /* readpipe() vas overriden */
2245         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2246         pl_yylval.opval = PL_lex_op;
2247         PL_lex_op = NULL;
2248         PL_lex_stuff = NULL;
2249         return THING;
2250     }
2251
2252     PL_sublex_info.super_state = PL_lex_state;
2253     PL_sublex_info.sub_inwhat = (U16)op_type;
2254     PL_sublex_info.sub_op = PL_lex_op;
2255     PL_lex_state = LEX_INTERPPUSH;
2256
2257     PL_expect = XTERM;
2258     if (PL_lex_op) {
2259         pl_yylval.opval = PL_lex_op;
2260         PL_lex_op = NULL;
2261         return PMFUNC;
2262     }
2263     else
2264         return FUNC;
2265 }
2266
2267 /*
2268  * S_sublex_push
2269  * Create a new scope to save the lexing state.  The scope will be
2270  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2271  * to the uc, lc, etc. found before.
2272  * Sets PL_lex_state to LEX_INTERPCONCAT.
2273  */
2274
2275 STATIC I32
2276 S_sublex_push(pTHX)
2277 {
2278     dVAR;
2279     ENTER;
2280
2281     PL_lex_state = PL_sublex_info.super_state;
2282     SAVEBOOL(PL_lex_dojoin);
2283     SAVEI32(PL_lex_brackets);
2284     SAVEI32(PL_lex_casemods);
2285     SAVEI32(PL_lex_starts);
2286     SAVEI8(PL_lex_state);
2287     SAVEVPTR(PL_lex_inpat);
2288     SAVEI16(PL_lex_inwhat);
2289     SAVECOPLINE(PL_curcop);
2290     SAVEPPTR(PL_bufptr);
2291     SAVEPPTR(PL_bufend);
2292     SAVEPPTR(PL_oldbufptr);
2293     SAVEPPTR(PL_oldoldbufptr);
2294     SAVEPPTR(PL_last_lop);
2295     SAVEPPTR(PL_last_uni);
2296     SAVEPPTR(PL_linestart);
2297     SAVESPTR(PL_linestr);
2298     SAVEGENERICPV(PL_lex_brackstack);
2299     SAVEGENERICPV(PL_lex_casestack);
2300
2301     PL_linestr = PL_lex_stuff;
2302     PL_lex_stuff = NULL;
2303
2304     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2305         = SvPVX(PL_linestr);
2306     PL_bufend += SvCUR(PL_linestr);
2307     PL_last_lop = PL_last_uni = NULL;
2308     SAVEFREESV(PL_linestr);
2309
2310     PL_lex_dojoin = FALSE;
2311     PL_lex_brackets = 0;
2312     Newx(PL_lex_brackstack, 120, char);
2313     Newx(PL_lex_casestack, 12, char);
2314     PL_lex_casemods = 0;
2315     *PL_lex_casestack = '\0';
2316     PL_lex_starts = 0;
2317     PL_lex_state = LEX_INTERPCONCAT;
2318     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2319
2320     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2321     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2322         PL_lex_inpat = PL_sublex_info.sub_op;
2323     else
2324         PL_lex_inpat = NULL;
2325
2326     return '(';
2327 }
2328
2329 /*
2330  * S_sublex_done
2331  * Restores lexer state after a S_sublex_push.
2332  */
2333
2334 STATIC I32
2335 S_sublex_done(pTHX)
2336 {
2337     dVAR;
2338     if (!PL_lex_starts++) {
2339         SV * const sv = newSVpvs("");
2340         if (SvUTF8(PL_linestr))
2341             SvUTF8_on(sv);
2342         PL_expect = XOPERATOR;
2343         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2344         return THING;
2345     }
2346
2347     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2348         PL_lex_state = LEX_INTERPCASEMOD;
2349         return yylex();
2350     }
2351
2352     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2353     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2354         PL_linestr = PL_lex_repl;
2355         PL_lex_inpat = 0;
2356         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2357         PL_bufend += SvCUR(PL_linestr);
2358         PL_last_lop = PL_last_uni = NULL;
2359         SAVEFREESV(PL_linestr);
2360         PL_lex_dojoin = FALSE;
2361         PL_lex_brackets = 0;
2362         PL_lex_casemods = 0;
2363         *PL_lex_casestack = '\0';
2364         PL_lex_starts = 0;
2365         if (SvEVALED(PL_lex_repl)) {
2366             PL_lex_state = LEX_INTERPNORMAL;
2367             PL_lex_starts++;
2368             /*  we don't clear PL_lex_repl here, so that we can check later
2369                 whether this is an evalled subst; that means we rely on the
2370                 logic to ensure sublex_done() is called again only via the
2371                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2372         }
2373         else {
2374             PL_lex_state = LEX_INTERPCONCAT;
2375             PL_lex_repl = NULL;
2376         }
2377         return ',';
2378     }
2379     else {
2380 #ifdef PERL_MAD
2381         if (PL_madskills) {
2382             if (PL_thiswhite) {
2383                 if (!PL_endwhite)
2384                     PL_endwhite = newSVpvs("");
2385                 sv_catsv(PL_endwhite, PL_thiswhite);
2386                 PL_thiswhite = 0;
2387             }
2388             if (PL_thistoken)
2389                 sv_setpvs(PL_thistoken,"");
2390             else
2391                 PL_realtokenstart = -1;
2392         }
2393 #endif
2394         LEAVE;
2395         PL_bufend = SvPVX(PL_linestr);
2396         PL_bufend += SvCUR(PL_linestr);
2397         PL_expect = XOPERATOR;
2398         PL_sublex_info.sub_inwhat = 0;
2399         return ')';
2400     }
2401 }
2402
2403 /*
2404   scan_const
2405
2406   Extracts a pattern, double-quoted string, or transliteration.  This
2407   is terrifying code.
2408
2409   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2410   processing a pattern (PL_lex_inpat is true), a transliteration
2411   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2412
2413   Returns a pointer to the character scanned up to. If this is
2414   advanced from the start pointer supplied (i.e. if anything was
2415   successfully parsed), will leave an OP for the substring scanned
2416   in pl_yylval. Caller must intuit reason for not parsing further
2417   by looking at the next characters herself.
2418
2419   In patterns:
2420     backslashes:
2421       double-quoted style: \r and \n
2422       regexp special ones: \D \s
2423       constants: \x31
2424       backrefs: \1
2425       case and quoting: \U \Q \E
2426     stops on @ and $, but not for $ as tail anchor
2427
2428   In transliterations:
2429     characters are VERY literal, except for - not at the start or end
2430     of the string, which indicates a range. If the range is in bytes,
2431     scan_const expands the range to the full set of intermediate
2432     characters. If the range is in utf8, the hyphen is replaced with
2433     a certain range mark which will be handled by pmtrans() in op.c.
2434
2435   In double-quoted strings:
2436     backslashes:
2437       double-quoted style: \r and \n
2438       constants: \x31
2439       deprecated backrefs: \1 (in substitution replacements)
2440       case and quoting: \U \Q \E
2441     stops on @ and $
2442
2443   scan_const does *not* construct ops to handle interpolated strings.
2444   It stops processing as soon as it finds an embedded $ or @ variable
2445   and leaves it to the caller to work out what's going on.
2446
2447   embedded arrays (whether in pattern or not) could be:
2448       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2449
2450   $ in double-quoted strings must be the symbol of an embedded scalar.
2451
2452   $ in pattern could be $foo or could be tail anchor.  Assumption:
2453   it's a tail anchor if $ is the last thing in the string, or if it's
2454   followed by one of "()| \r\n\t"
2455
2456   \1 (backreferences) are turned into $1
2457
2458   The structure of the code is
2459       while (there's a character to process) {
2460           handle transliteration ranges
2461           skip regexp comments /(?#comment)/ and codes /(?{code})/
2462           skip #-initiated comments in //x patterns
2463           check for embedded arrays
2464           check for embedded scalars
2465           if (backslash) {
2466               leave intact backslashes from leaveit (below)
2467               deprecate \1 in substitution replacements
2468               handle string-changing backslashes \l \U \Q \E, etc.
2469               switch (what was escaped) {
2470                   handle \- in a transliteration (becomes a literal -)
2471                   handle \132 (octal characters)
2472                   handle \x15 and \x{1234} (hex characters)
2473                   handle \N{name} (named characters)
2474                   handle \cV (control characters)
2475                   handle printf-style backslashes (\f, \r, \n, etc)
2476               } (end switch)
2477               continue
2478           } (end if backslash)
2479           handle regular character
2480     } (end while character to read)
2481
2482 */
2483
2484 STATIC char *
2485 S_scan_const(pTHX_ char *start)
2486 {
2487     dVAR;
2488     register char *send = PL_bufend;            /* end of the constant */
2489     SV *sv = newSV(send - start);               /* sv for the constant.  See
2490                                                    note below on sizing. */
2491     register char *s = start;                   /* start of the constant */
2492     register char *d = SvPVX(sv);               /* destination for copies */
2493     bool dorange = FALSE;                       /* are we in a translit range? */
2494     bool didrange = FALSE;                      /* did we just finish a range? */
2495     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2496     I32  this_utf8 = UTF;                       /* Is the source string assumed
2497                                                    to be UTF8?  But, this can
2498                                                    show as true when the source
2499                                                    isn't utf8, as for example
2500                                                    when it is entirely composed
2501                                                    of hex constants */
2502
2503     /* Note on sizing:  The scanned constant is placed into sv, which is
2504      * initialized by newSV() assuming one byte of output for every byte of
2505      * input.  This routine expects newSV() to allocate an extra byte for a
2506      * trailing NUL, which this routine will append if it gets to the end of
2507      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2508      * CAPITAL LETTER A}), or more output than input if the constant ends up
2509      * recoded to utf8, but each time a construct is found that might increase
2510      * the needed size, SvGROW() is called.  Its size parameter each time is
2511      * based on the best guess estimate at the time, namely the length used so
2512      * far, plus the length the current construct will occupy, plus room for
2513      * the trailing NUL, plus one byte for every input byte still unscanned */
2514
2515     UV uv;
2516 #ifdef EBCDIC
2517     UV literal_endpoint = 0;
2518     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2519 #endif
2520
2521     PERL_ARGS_ASSERT_SCAN_CONST;
2522
2523     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2524         /* If we are doing a trans and we know we want UTF8 set expectation */
2525         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2526         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2527     }
2528
2529
2530     while (s < send || dorange) {
2531         /* get transliterations out of the way (they're most literal) */
2532         if (PL_lex_inwhat == OP_TRANS) {
2533             /* expand a range A-Z to the full set of characters.  AIE! */
2534             if (dorange) {
2535                 I32 i;                          /* current expanded character */
2536                 I32 min;                        /* first character in range */
2537                 I32 max;                        /* last character in range */
2538
2539 #ifdef EBCDIC
2540                 UV uvmax = 0;
2541 #endif
2542
2543                 if (has_utf8
2544 #ifdef EBCDIC
2545                     && !native_range
2546 #endif
2547                     ) {
2548                     char * const c = (char*)utf8_hop((U8*)d, -1);
2549                     char *e = d++;
2550                     while (e-- > c)
2551                         *(e + 1) = *e;
2552                     *c = (char)UTF_TO_NATIVE(0xff);
2553                     /* mark the range as done, and continue */
2554                     dorange = FALSE;
2555                     didrange = TRUE;
2556                     continue;
2557                 }
2558
2559                 i = d - SvPVX_const(sv);                /* remember current offset */
2560 #ifdef EBCDIC
2561                 SvGROW(sv,
2562                        SvLEN(sv) + (has_utf8 ?
2563                                     (512 - UTF_CONTINUATION_MARK +
2564                                      UNISKIP(0x100))
2565                                     : 256));
2566                 /* How many two-byte within 0..255: 128 in UTF-8,
2567                  * 96 in UTF-8-mod. */
2568 #else
2569                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2570 #endif
2571                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2572 #ifdef EBCDIC
2573                 if (has_utf8) {
2574                     int j;
2575                     for (j = 0; j <= 1; j++) {
2576                         char * const c = (char*)utf8_hop((U8*)d, -1);
2577                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2578                         if (j)
2579                             min = (U8)uv;
2580                         else if (uv < 256)
2581                             max = (U8)uv;
2582                         else {
2583                             max = (U8)0xff; /* only to \xff */
2584                             uvmax = uv; /* \x{100} to uvmax */
2585                         }
2586                         d = c; /* eat endpoint chars */
2587                      }
2588                 }
2589                else {
2590 #endif
2591                    d -= 2;              /* eat the first char and the - */
2592                    min = (U8)*d;        /* first char in range */
2593                    max = (U8)d[1];      /* last char in range  */
2594 #ifdef EBCDIC
2595                }
2596 #endif
2597
2598                 if (min > max) {
2599                     Perl_croak(aTHX_
2600                                "Invalid range \"%c-%c\" in transliteration operator",
2601                                (char)min, (char)max);
2602                 }
2603
2604 #ifdef EBCDIC
2605                 if (literal_endpoint == 2 &&
2606                     ((isLOWER(min) && isLOWER(max)) ||
2607                      (isUPPER(min) && isUPPER(max)))) {
2608                     if (isLOWER(min)) {
2609                         for (i = min; i <= max; i++)
2610                             if (isLOWER(i))
2611                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2612                     } else {
2613                         for (i = min; i <= max; i++)
2614                             if (isUPPER(i))
2615                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2616                     }
2617                 }
2618                 else
2619 #endif
2620                     for (i = min; i <= max; i++)
2621 #ifdef EBCDIC
2622                         if (has_utf8) {
2623                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2624                             if (UNI_IS_INVARIANT(ch))
2625                                 *d++ = (U8)i;
2626                             else {
2627                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2628                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2629                             }
2630                         }
2631                         else
2632 #endif
2633                             *d++ = (char)i;
2634
2635 #ifdef EBCDIC
2636                 if (uvmax) {
2637                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2638                     if (uvmax > 0x101)
2639                         *d++ = (char)UTF_TO_NATIVE(0xff);
2640                     if (uvmax > 0x100)
2641                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2642                 }
2643 #endif
2644
2645                 /* mark the range as done, and continue */
2646                 dorange = FALSE;
2647                 didrange = TRUE;
2648 #ifdef EBCDIC
2649                 literal_endpoint = 0;
2650 #endif
2651                 continue;
2652             }
2653
2654             /* range begins (ignore - as first or last char) */
2655             else if (*s == '-' && s+1 < send  && s != start) {
2656                 if (didrange) {
2657                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2658                 }
2659                 if (has_utf8
2660 #ifdef EBCDIC
2661                     && !native_range
2662 #endif
2663                     ) {
2664                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2665                     s++;
2666                     continue;
2667                 }
2668                 dorange = TRUE;
2669                 s++;
2670             }
2671             else {
2672                 didrange = FALSE;
2673 #ifdef EBCDIC
2674                 literal_endpoint = 0;
2675                 native_range = TRUE;
2676 #endif
2677             }
2678         }
2679
2680         /* if we get here, we're not doing a transliteration */
2681
2682         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2683            except for the last char, which will be done separately. */
2684         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2685             if (s[2] == '#') {
2686                 while (s+1 < send && *s != ')')
2687                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2688             }
2689             else if (s[2] == '{' /* This should match regcomp.c */
2690                     || (s[2] == '?' && s[3] == '{'))
2691             {
2692                 I32 count = 1;
2693                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2694                 char c;
2695
2696                 while (count && (c = *regparse)) {
2697                     if (c == '\\' && regparse[1])
2698                         regparse++;
2699                     else if (c == '{')
2700                         count++;
2701                     else if (c == '}')
2702                         count--;
2703                     regparse++;
2704                 }
2705                 if (*regparse != ')')
2706                     regparse--;         /* Leave one char for continuation. */
2707                 while (s < regparse)
2708                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2709             }
2710         }
2711
2712         /* likewise skip #-initiated comments in //x patterns */
2713         else if (*s == '#' && PL_lex_inpat &&
2714           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2715             while (s+1 < send && *s != '\n')
2716                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2717         }
2718
2719         /* check for embedded arrays
2720            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2721            */
2722         else if (*s == '@' && s[1]) {
2723             if (isALNUM_lazy_if(s+1,UTF))
2724                 break;
2725             if (strchr(":'{$", s[1]))
2726                 break;
2727             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2728                 break; /* in regexp, neither @+ nor @- are interpolated */
2729         }
2730
2731         /* check for embedded scalars.  only stop if we're sure it's a
2732            variable.
2733         */
2734         else if (*s == '$') {
2735             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2736                 break;
2737             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2738                 if (s[1] == '\\') {
2739                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2740                                    "Possible unintended interpolation of $\\ in regex");
2741                 }
2742                 break;          /* in regexp, $ might be tail anchor */
2743             }
2744         }
2745
2746         /* End of else if chain - OP_TRANS rejoin rest */
2747
2748         /* backslashes */
2749         if (*s == '\\' && s+1 < send) {
2750             s++;
2751
2752             /* deprecate \1 in strings and substitution replacements */
2753             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2754                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2755             {
2756                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2757                 *--s = '$';
2758                 break;
2759             }
2760
2761             /* string-change backslash escapes */
2762             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2763                 --s;
2764                 break;
2765             }
2766             /* skip any other backslash escapes in a pattern */
2767             else if (PL_lex_inpat) {
2768                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2769                 goto default_action;
2770             }
2771
2772             /* if we get here, it's either a quoted -, or a digit */
2773             switch (*s) {
2774
2775             /* quoted - in transliterations */
2776             case '-':
2777                 if (PL_lex_inwhat == OP_TRANS) {
2778                     *d++ = *s++;
2779                     continue;
2780                 }
2781                 /* FALL THROUGH */
2782             default:
2783                 {
2784                     if ((isALPHA(*s) || isDIGIT(*s)))
2785                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2786                                        "Unrecognized escape \\%c passed through",
2787                                        *s);
2788                     /* default action is to copy the quoted character */
2789                     goto default_action;
2790                 }
2791
2792             /* eg. \132 indicates the octal constant 0x132 */
2793             case '0': case '1': case '2': case '3':
2794             case '4': case '5': case '6': case '7':
2795                 {
2796                     I32 flags = 0;
2797                     STRLEN len = 3;
2798                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2799                     s += len;
2800                 }
2801                 goto NUM_ESCAPE_INSERT;
2802
2803             /* eg. \x24 indicates the hex constant 0x24 */
2804             case 'x':
2805                 ++s;
2806                 if (*s == '{') {
2807                     char* const e = strchr(s, '}');
2808                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2809                       PERL_SCAN_DISALLOW_PREFIX;
2810                     STRLEN len;
2811
2812                     ++s;
2813                     if (!e) {
2814                         yyerror("Missing right brace on \\x{}");
2815                         continue;
2816                     }
2817                     len = e - s;
2818                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2819                     s = e + 1;
2820                 }
2821                 else {
2822                     {
2823                         STRLEN len = 2;
2824                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2825                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2826                         s += len;
2827                     }
2828                 }
2829
2830               NUM_ESCAPE_INSERT:
2831                 /* Insert oct, hex, or \N{U+...} escaped character.  There will
2832                  * always be enough room in sv since such escapes will be
2833                  * longer than any UTF-8 sequence they can end up as, except if
2834                  * they force us to recode the rest of the string into utf8 */
2835
2836                 /* Here uv is the ordinal of the next character being added in
2837                  * unicode (converted from native).  (It has to be done before
2838                  * here because \N is interpreted as unicode, and oct and hex
2839                  * as native.) */
2840                 if (!UNI_IS_INVARIANT(uv)) {
2841                     if (!has_utf8 && uv > 255) {
2842                         /* Might need to recode whatever we have accumulated so
2843                          * far if it contains any chars variant in utf8 or
2844                          * utf-ebcdic. */
2845
2846                         SvCUR_set(sv, d - SvPVX_const(sv));
2847                         SvPOK_on(sv);
2848                         *d = '\0';
2849                         /* See Note on sizing above.  */
2850                         sv_utf8_upgrade_flags_grow(sv,
2851                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2852                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2853                         d = SvPVX(sv) + SvCUR(sv);
2854                         has_utf8 = TRUE;
2855                     }
2856
2857                     if (has_utf8) {
2858                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2859                         if (PL_lex_inwhat == OP_TRANS &&
2860                             PL_sublex_info.sub_op) {
2861                             PL_sublex_info.sub_op->op_private |=
2862                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2863                                              : OPpTRANS_TO_UTF);
2864                         }
2865 #ifdef EBCDIC
2866                         if (uv > 255 && !dorange)
2867                             native_range = FALSE;
2868 #endif
2869                     }
2870                     else {
2871                         *d++ = (char)uv;
2872                     }
2873                 }
2874                 else {
2875                     *d++ = (char) uv;
2876                 }
2877                 continue;
2878
2879             /* \N{LATIN SMALL LETTER A} is a named character, and so is
2880              * \N{U+0041} */
2881             case 'N':
2882                 ++s;
2883                 if (*s == '{') {
2884                     char* e = strchr(s, '}');
2885                     SV *res;
2886                     STRLEN len;
2887                     const char *str;
2888
2889                     if (!e) {
2890                         yyerror("Missing right brace on \\N{}");
2891                         e = s - 1;
2892                         goto cont_scan;
2893                     }
2894                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2895                         /* \N{U+...} The ... is a unicode value even on EBCDIC
2896                          * machines */
2897                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2898                           PERL_SCAN_DISALLOW_PREFIX;
2899                         s += 3;
2900                         len = e - s;
2901                         uv = grok_hex(s, &len, &flags, NULL);
2902                         if ( e > s && len != (STRLEN)(e - s) ) {
2903                             uv = 0xFFFD;
2904                         }
2905                         s = e + 1;
2906                         goto NUM_ESCAPE_INSERT;
2907                     }
2908                     res = newSVpvn(s + 1, e - s - 1);
2909                     res = new_constant( NULL, 0, "charnames",
2910                                         res, NULL, s - 2, e - s + 3 );
2911                     if (has_utf8)
2912                         sv_utf8_upgrade(res);
2913                     str = SvPV_const(res,len);
2914 #ifdef EBCDIC_NEVER_MIND
2915                     /* charnames uses pack U and that has been
2916                      * recently changed to do the below uni->native
2917                      * mapping, so this would be redundant (and wrong,
2918                      * the code point would be doubly converted).
2919                      * But leave this in just in case the pack U change
2920                      * gets revoked, but the semantics is still
2921                      * desireable for charnames. --jhi */
2922                     {
2923                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2924
2925                          if (uv < 0x100) {
2926                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2927
2928                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2929                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2930                               str = SvPV_const(res, len);
2931                          }
2932                     }
2933 #endif
2934                     /* If destination is not in utf8 but this new character is,
2935                      * recode the dest to utf8 */
2936                     if (!has_utf8 && SvUTF8(res)) {
2937                         SvCUR_set(sv, d - SvPVX_const(sv));
2938                         SvPOK_on(sv);
2939                         *d = '\0';
2940                         /* See Note on sizing above.  */
2941                         sv_utf8_upgrade_flags_grow(sv,
2942                                             SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2943                                             len + (STRLEN)(send - s) + 1);
2944                         d = SvPVX(sv) + SvCUR(sv);
2945                         has_utf8 = TRUE;
2946                     } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2947
2948                         /* See Note on sizing above.  (NOTE: SvCUR() is not set
2949                          * correctly here). */
2950                         const STRLEN off = d - SvPVX_const(sv);
2951                         d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2952                     }
2953 #ifdef EBCDIC
2954                     if (!dorange)
2955                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2956 #endif
2957                     Copy(str, d, len, char);
2958                     d += len;
2959                     SvREFCNT_dec(res);
2960                   cont_scan:
2961                     s = e + 1;
2962                 }
2963                 else
2964                     yyerror("Missing braces on \\N{}");
2965                 continue;
2966
2967             /* \c is a control character */
2968             case 'c':
2969                 s++;
2970                 if (s < send) {
2971                     U8 c = *s++;
2972 #ifdef EBCDIC
2973                     if (isLOWER(c))
2974                         c = toUPPER(c);
2975 #endif
2976                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2977                 }
2978                 else {
2979                     yyerror("Missing control char name in \\c");
2980                 }
2981                 continue;
2982
2983             /* printf-style backslashes, formfeeds, newlines, etc */
2984             case 'b':
2985                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2986                 break;
2987             case 'n':
2988                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2989                 break;
2990             case 'r':
2991                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2992                 break;
2993             case 'f':
2994                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2995                 break;
2996             case 't':
2997                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2998                 break;
2999             case 'e':
3000                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3001                 break;
3002             case 'a':
3003                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3004                 break;
3005             } /* end switch */
3006
3007             s++;
3008             continue;
3009         } /* end if (backslash) */
3010 #ifdef EBCDIC
3011         else
3012             literal_endpoint++;
3013 #endif
3014
3015     default_action:
3016         /* If we started with encoded form, or already know we want it,
3017            then encode the next character */
3018         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3019             STRLEN len  = 1;
3020
3021
3022             /* One might think that it is wasted effort in the case of the
3023              * source being utf8 (this_utf8 == TRUE) to take the next character
3024              * in the source, convert it to an unsigned value, and then convert
3025              * it back again.  But the source has not been validated here.  The
3026              * routine that does the conversion checks for errors like
3027              * malformed utf8 */
3028
3029             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3030             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3031             if (!has_utf8) {
3032                 SvCUR_set(sv, d - SvPVX_const(sv));
3033                 SvPOK_on(sv);
3034                 *d = '\0';
3035                 /* See Note on sizing above.  */
3036                 sv_utf8_upgrade_flags_grow(sv,
3037                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3038                                         need + (STRLEN)(send - s) + 1);
3039                 d = SvPVX(sv) + SvCUR(sv);
3040                 has_utf8 = TRUE;
3041             } else if (need > len) {
3042                 /* encoded value larger than old, may need extra space (NOTE:
3043                  * SvCUR() is not set correctly here).   See Note on sizing
3044                  * above.  */
3045                 const STRLEN off = d - SvPVX_const(sv);
3046                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3047             }
3048             s += len;
3049
3050             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3051 #ifdef EBCDIC
3052             if (uv > 255 && !dorange)
3053                 native_range = FALSE;
3054 #endif
3055         }
3056         else {
3057             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3058         }
3059     } /* while loop to process each character */
3060
3061     /* terminate the string and set up the sv */
3062     *d = '\0';
3063     SvCUR_set(sv, d - SvPVX_const(sv));
3064     if (SvCUR(sv) >= SvLEN(sv))
3065         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3066
3067     SvPOK_on(sv);
3068     if (PL_encoding && !has_utf8) {
3069         sv_recode_to_utf8(sv, PL_encoding);
3070         if (SvUTF8(sv))
3071             has_utf8 = TRUE;
3072     }
3073     if (has_utf8) {
3074         SvUTF8_on(sv);
3075         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3076             PL_sublex_info.sub_op->op_private |=
3077                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3078         }
3079     }
3080
3081     /* shrink the sv if we allocated more than we used */
3082     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3083         SvPV_shrink_to_cur(sv);
3084     }
3085
3086     /* return the substring (via pl_yylval) only if we parsed anything */
3087     if (s > PL_bufptr) {
3088         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3089             const char *const key = PL_lex_inpat ? "qr" : "q";
3090             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3091             const char *type;
3092             STRLEN typelen;
3093
3094             if (PL_lex_inwhat == OP_TRANS) {
3095                 type = "tr";
3096                 typelen = 2;
3097             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3098                 type = "s";
3099                 typelen = 1;
3100             } else  {
3101                 type = "qq";
3102                 typelen = 2;
3103             }
3104
3105             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3106                                 type, typelen);
3107         }
3108         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3109     } else
3110         SvREFCNT_dec(sv);
3111     return s;
3112 }
3113
3114 /* S_intuit_more
3115  * Returns TRUE if there's more to the expression (e.g., a subscript),
3116  * FALSE otherwise.
3117  *
3118  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3119  *
3120  * ->[ and ->{ return TRUE
3121  * { and [ outside a pattern are always subscripts, so return TRUE
3122  * if we're outside a pattern and it's not { or [, then return FALSE
3123  * if we're in a pattern and the first char is a {
3124  *   {4,5} (any digits around the comma) returns FALSE
3125  * if we're in a pattern and the first char is a [
3126  *   [] returns FALSE
3127  *   [SOMETHING] has a funky algorithm to decide whether it's a
3128  *      character class or not.  It has to deal with things like
3129  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3130  * anything else returns TRUE
3131  */
3132
3133 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3134
3135 STATIC int
3136 S_intuit_more(pTHX_ register char *s)
3137 {
3138     dVAR;
3139
3140     PERL_ARGS_ASSERT_INTUIT_MORE;
3141
3142     if (PL_lex_brackets)
3143         return TRUE;
3144     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3145         return TRUE;
3146     if (*s != '{' && *s != '[')
3147         return FALSE;
3148     if (!PL_lex_inpat)
3149         return TRUE;
3150
3151     /* In a pattern, so maybe we have {n,m}. */
3152     if (*s == '{') {
3153         s++;
3154         if (!isDIGIT(*s))
3155             return TRUE;
3156         while (isDIGIT(*s))
3157             s++;
3158         if (*s == ',')
3159             s++;
3160         while (isDIGIT(*s))
3161             s++;
3162         if (*s == '}')
3163             return FALSE;
3164         return TRUE;
3165
3166     }
3167
3168     /* On the other hand, maybe we have a character class */
3169
3170     s++;
3171     if (*s == ']' || *s == '^')
3172         return FALSE;
3173     else {
3174         /* this is terrifying, and it works */
3175         int weight = 2;         /* let's weigh the evidence */
3176         char seen[256];
3177         unsigned char un_char = 255, last_un_char;
3178         const char * const send = strchr(s,']');
3179         char tmpbuf[sizeof PL_tokenbuf * 4];
3180
3181         if (!send)              /* has to be an expression */
3182             return TRUE;
3183
3184         Zero(seen,256,char);
3185         if (*s == '$')
3186             weight -= 3;
3187         else if (isDIGIT(*s)) {
3188             if (s[1] != ']') {
3189                 if (isDIGIT(s[1]) && s[2] == ']')
3190                     weight -= 10;
3191             }
3192             else
3193                 weight -= 100;
3194         }
3195         for (; s < send; s++) {
3196             last_un_char = un_char;
3197             un_char = (unsigned char)*s;
3198             switch (*s) {
3199             case '@':
3200             case '&':
3201             case '$':
3202                 weight -= seen[un_char] * 10;
3203                 if (isALNUM_lazy_if(s+1,UTF)) {
3204                     int len;
3205                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3206                     len = (int)strlen(tmpbuf);
3207                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3208                         weight -= 100;
3209                     else
3210                         weight -= 10;
3211                 }
3212                 else if (*s == '$' && s[1] &&
3213                   strchr("[#!%*<>()-=",s[1])) {
3214                     if (/*{*/ strchr("])} =",s[2]))
3215                         weight -= 10;
3216                     else
3217                         weight -= 1;
3218                 }
3219                 break;
3220             case '\\':
3221                 un_char = 254;
3222                 if (s[1]) {
3223                     if (strchr("wds]",s[1]))
3224                         weight += 100;
3225                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3226                         weight += 1;
3227                     else if (strchr("rnftbxcav",s[1]))
3228                         weight += 40;
3229                     else if (isDIGIT(s[1])) {
3230                         weight += 40;
3231                         while (s[1] && isDIGIT(s[1]))
3232                             s++;
3233                     }
3234                 }
3235                 else
3236                     weight += 100;
3237                 break;
3238             case '-':
3239                 if (s[1] == '\\')
3240                     weight += 50;
3241                 if (strchr("aA01! ",last_un_char))
3242                     weight += 30;
3243                 if (strchr("zZ79~",s[1]))
3244                     weight += 30;
3245                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3246                     weight -= 5;        /* cope with negative subscript */
3247                 break;
3248             default:
3249                 if (!isALNUM(last_un_char)
3250                     && !(last_un_char == '$' || last_un_char == '@'
3251                          || last_un_char == '&')
3252                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3253                     char *d = tmpbuf;
3254                     while (isALPHA(*s))
3255                         *d++ = *s++;
3256                     *d = '\0';
3257                     if (keyword(tmpbuf, d - tmpbuf, 0))
3258                         weight -= 150;
3259                 }
3260                 if (un_char == last_un_char + 1)
3261                     weight += 5;
3262                 weight -= seen[un_char];
3263                 break;
3264             }
3265             seen[un_char]++;
3266         }
3267         if (weight >= 0)        /* probably a character class */
3268             return FALSE;
3269     }
3270
3271     return TRUE;
3272 }
3273
3274 /*
3275  * S_intuit_method
3276  *
3277  * Does all the checking to disambiguate
3278  *   foo bar
3279  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3280  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3281  *
3282  * First argument is the stuff after the first token, e.g. "bar".
3283  *
3284  * Not a method if bar is a filehandle.
3285  * Not a method if foo is a subroutine prototyped to take a filehandle.
3286  * Not a method if it's really "Foo $bar"
3287  * Method if it's "foo $bar"
3288  * Not a method if it's really "print foo $bar"
3289  * Method if it's really "foo package::" (interpreted as package->foo)
3290  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3291  * Not a method if bar is a filehandle or package, but is quoted with
3292  *   =>
3293  */
3294
3295 STATIC int
3296 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3297 {
3298     dVAR;
3299     char *s = start + (*start == '$');
3300     char tmpbuf[sizeof PL_tokenbuf];
3301     STRLEN len;
3302     GV* indirgv;
3303 #ifdef PERL_MAD
3304     int soff;
3305 #endif
3306
3307     PERL_ARGS_ASSERT_INTUIT_METHOD;
3308
3309     if (gv) {
3310         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3311             return 0;
3312         if (cv) {
3313             if (SvPOK(cv)) {
3314                 const char *proto = SvPVX_const(cv);
3315                 if (proto) {
3316                     if (*proto == ';')
3317                         proto++;
3318                     if (*proto == '*')
3319                         return 0;
3320                 }
3321             }
3322         } else
3323             gv = NULL;
3324     }
3325     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3326     /* start is the beginning of the possible filehandle/object,
3327      * and s is the end of it
3328      * tmpbuf is a copy of it
3329      */
3330
3331     if (*start == '$') {
3332         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3333                 isUPPER(*PL_tokenbuf))
3334             return 0;
3335 #ifdef PERL_MAD
3336         len = start - SvPVX(PL_linestr);
3337 #endif
3338         s = PEEKSPACE(s);
3339 #ifdef PERL_MAD
3340         start = SvPVX(PL_linestr) + len;
3341 #endif
3342         PL_bufptr = start;
3343         PL_expect = XREF;
3344         return *s == '(' ? FUNCMETH : METHOD;
3345     }
3346     if (!keyword(tmpbuf, len, 0)) {
3347         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3348             len -= 2;
3349             tmpbuf[len] = '\0';
3350 #ifdef PERL_MAD
3351             soff = s - SvPVX(PL_linestr);
3352 #endif
3353             goto bare_package;
3354         }
3355         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3356         if (indirgv && GvCVu(indirgv))
3357             return 0;
3358         /* filehandle or package name makes it a method */
3359         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3360 #ifdef PERL_MAD
3361             soff = s - SvPVX(PL_linestr);
3362 #endif
3363             s = PEEKSPACE(s);
3364             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3365                 return 0;       /* no assumptions -- "=>" quotes bearword */
3366       bare_package:
3367             start_force(PL_curforce);
3368             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3369                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3370             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3371             if (PL_madskills)
3372                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3373             PL_expect = XTERM;
3374             force_next(WORD);
3375             PL_bufptr = s;
3376 #ifdef PERL_MAD
3377             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3378 #endif
3379             return *s == '(' ? FUNCMETH : METHOD;
3380         }
3381     }
3382     return 0;
3383 }
3384
3385 /* Encoded script support. filter_add() effectively inserts a
3386  * 'pre-processing' function into the current source input stream.
3387  * Note that the filter function only applies to the current source file
3388  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3389  *
3390  * The datasv parameter (which may be NULL) can be used to pass
3391  * private data to this instance of the filter. The filter function
3392  * can recover the SV using the FILTER_DATA macro and use it to
3393  * store private buffers and state information.
3394  *
3395  * The supplied datasv parameter is upgraded to a PVIO type
3396  * and the IoDIRP/IoANY field is used to store the function pointer,
3397  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3398  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3399  * private use must be set using malloc'd pointers.
3400  */
3401
3402 SV *
3403 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3404 {
3405     dVAR;
3406     if (!funcp)
3407         return NULL;
3408
3409     if (!PL_parser)
3410         return NULL;
3411
3412     if (!PL_rsfp_filters)
3413         PL_rsfp_filters = newAV();
3414     if (!datasv)
3415         datasv = newSV(0);
3416     SvUPGRADE(datasv, SVt_PVIO);
3417     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3418     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3419     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3420                           FPTR2DPTR(void *, IoANY(datasv)),
3421                           SvPV_nolen(datasv)));
3422     av_unshift(PL_rsfp_filters, 1);
3423     av_store(PL_rsfp_filters, 0, datasv) ;
3424     return(datasv);
3425 }
3426
3427
3428 /* Delete most recently added instance of this filter function. */
3429 void
3430 Perl_filter_del(pTHX_ filter_t funcp)
3431 {
3432     dVAR;
3433     SV *datasv;
3434
3435     PERL_ARGS_ASSERT_FILTER_DEL;
3436
3437 #ifdef DEBUGGING
3438     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3439                           FPTR2DPTR(void*, funcp)));
3440 #endif
3441     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3442         return;
3443     /* if filter is on top of stack (usual case) just pop it off */
3444     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3445     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3446         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3447         IoANY(datasv) = (void *)NULL;
3448         sv_free(av_pop(PL_rsfp_filters));
3449
3450         return;
3451     }
3452     /* we need to search for the correct entry and clear it     */
3453     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3454 }
3455
3456
3457 /* Invoke the idxth filter function for the current rsfp.        */
3458 /* maxlen 0 = read one text line */
3459 I32
3460 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3461 {
3462     dVAR;
3463     filter_t funcp;
3464     SV *datasv = NULL;
3465     /* This API is bad. It should have been using unsigned int for maxlen.
3466        Not sure if we want to change the API, but if not we should sanity
3467        check the value here.  */
3468     const unsigned int correct_length
3469         = maxlen < 0 ?
3470 #ifdef PERL_MICRO
3471         0x7FFFFFFF
3472 #else
3473         INT_MAX
3474 #endif
3475         : maxlen;
3476
3477     PERL_ARGS_ASSERT_FILTER_READ;
3478
3479     if (!PL_parser || !PL_rsfp_filters)
3480         return -1;
3481     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3482         /* Provide a default input filter to make life easy.    */
3483         /* Note that we append to the line. This is handy.      */
3484         DEBUG_P(PerlIO_printf(Perl_debug_log,
3485                               "filter_read %d: from rsfp\n", idx));
3486         if (correct_length) {
3487             /* Want a block */
3488             int len ;
3489             const int old_len = SvCUR(buf_sv);
3490
3491             /* ensure buf_sv is large enough */
3492             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3493             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3494                                    correct_length)) <= 0) {
3495                 if (PerlIO_error(PL_rsfp))
3496                     return -1;          /* error */
3497                 else
3498                     return 0 ;          /* end of file */
3499             }
3500             SvCUR_set(buf_sv, old_len + len) ;
3501             SvPVX(buf_sv)[old_len + len] = '\0';
3502         } else {
3503             /* Want a line */
3504             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3505                 if (PerlIO_error(PL_rsfp))
3506                     return -1;          /* error */
3507                 else
3508                     return 0 ;          /* end of file */
3509             }
3510         }
3511         return SvCUR(buf_sv);
3512     }
3513     /* Skip this filter slot if filter has been deleted */
3514     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3515         DEBUG_P(PerlIO_printf(Perl_debug_log,
3516                               "filter_read %d: skipped (filter deleted)\n",
3517                               idx));
3518         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3519     }
3520     /* Get function pointer hidden within datasv        */
3521     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3522     DEBUG_P(PerlIO_printf(Perl_debug_log,
3523                           "filter_read %d: via function %p (%s)\n",
3524                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3525     /* Call function. The function is expected to       */
3526     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3527     /* Return: <0:error, =0:eof, >0:not eof             */
3528     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3529 }
3530
3531 STATIC char *
3532 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3533 {
3534     dVAR;
3535
3536     PERL_ARGS_ASSERT_FILTER_GETS;
3537
3538 #ifdef PERL_CR_FILTER
3539     if (!PL_rsfp_filters) {
3540         filter_add(S_cr_textfilter,NULL);
3541     }
3542 #endif
3543     if (PL_rsfp_filters) {
3544         if (!append)
3545             SvCUR_set(sv, 0);   /* start with empty line        */
3546         if (FILTER_READ(0, sv, 0) > 0)
3547             return ( SvPVX(sv) ) ;
3548         else
3549             return NULL ;
3550     }
3551     else
3552         return (sv_gets(sv, PL_rsfp, append));
3553 }
3554
3555 STATIC HV *
3556 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3557 {
3558     dVAR;
3559     GV *gv;
3560
3561     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3562
3563     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3564         return PL_curstash;
3565
3566     if (len > 2 &&
3567         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3568         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3569     {
3570         return GvHV(gv);                        /* Foo:: */
3571     }
3572
3573     /* use constant CLASS => 'MyClass' */
3574     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3575     if (gv && GvCV(gv)) {
3576         SV * const sv = cv_const_sv(GvCV(gv));
3577         if (sv)
3578             pkgname = SvPV_const(sv, len);
3579     }
3580
3581     return gv_stashpvn(pkgname, len, 0);
3582 }
3583
3584 /*
3585  * S_readpipe_override
3586  * Check whether readpipe() is overriden, and generates the appropriate
3587  * optree, provided sublex_start() is called afterwards.
3588  */
3589 STATIC void
3590 S_readpipe_override(pTHX)
3591 {
3592     GV **gvp;
3593     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3594     pl_yylval.ival = OP_BACKTICK;
3595     if ((gv_readpipe
3596                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3597             ||
3598             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3599              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3600              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3601     {
3602         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3603             append_elem(OP_LIST,
3604                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3605                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3606     }
3607 }
3608
3609 #ifdef PERL_MAD
3610  /*
3611  * Perl_madlex
3612  * The intent of this yylex wrapper is to minimize the changes to the
3613  * tokener when we aren't interested in collecting madprops.  It remains
3614  * to be seen how successful this strategy will be...
3615  */
3616
3617 int
3618 Perl_madlex(pTHX)
3619 {
3620     int optype;
3621     char *s = PL_bufptr;
3622
3623     /* make sure PL_thiswhite is initialized */
3624     PL_thiswhite = 0;
3625     PL_thismad = 0;
3626
3627     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3628     if (PL_pending_ident)
3629         return S_pending_ident(aTHX);
3630
3631     /* previous token ate up our whitespace? */
3632     if (!PL_lasttoke && PL_nextwhite) {
3633         PL_thiswhite = PL_nextwhite;
3634         PL_nextwhite = 0;
3635     }
3636
3637     /* isolate the token, and figure out where it is without whitespace */
3638     PL_realtokenstart = -1;
3639     PL_thistoken = 0;
3640     optype = yylex();
3641     s = PL_bufptr;
3642     assert(PL_curforce < 0);
3643
3644     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3645         if (!PL_thistoken) {
3646             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3647                 PL_thistoken = newSVpvs("");
3648             else {
3649                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3650                 PL_thistoken = newSVpvn(tstart, s - tstart);
3651             }
3652         }
3653         if (PL_thismad) /* install head */
3654             CURMAD('X', PL_thistoken);
3655     }
3656
3657     /* last whitespace of a sublex? */
3658     if (optype == ')' && PL_endwhite) {
3659         CURMAD('X', PL_endwhite);
3660     }
3661
3662     if (!PL_thismad) {
3663
3664         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3665         if (!PL_thiswhite && !PL_endwhite && !optype) {
3666             sv_free(PL_thistoken);
3667             PL_thistoken = 0;
3668             return 0;
3669         }
3670
3671         /* put off final whitespace till peg */
3672         if (optype == ';' && !PL_rsfp) {
3673             PL_nextwhite = PL_thiswhite;
3674             PL_thiswhite = 0;
3675         }
3676         else if (PL_thisopen) {
3677             CURMAD('q', PL_thisopen);
3678             if (PL_thistoken)
3679                 sv_free(PL_thistoken);
3680             PL_thistoken = 0;
3681         }
3682         else {
3683             /* Store actual token text as madprop X */
3684             CURMAD('X', PL_thistoken);
3685         }
3686
3687         if (PL_thiswhite) {
3688             /* add preceding whitespace as madprop _ */
3689             CURMAD('_', PL_thiswhite);
3690         }
3691
3692         if (PL_thisstuff) {
3693             /* add quoted material as madprop = */
3694             CURMAD('=', PL_thisstuff);
3695         }
3696
3697         if (PL_thisclose) {
3698             /* add terminating quote as madprop Q */
3699             CURMAD('Q', PL_thisclose);
3700         }
3701     }
3702
3703     /* special processing based on optype */
3704
3705     switch (optype) {
3706
3707     /* opval doesn't need a TOKEN since it can already store mp */
3708     case WORD:
3709     case METHOD:
3710     case FUNCMETH:
3711     case THING:
3712     case PMFUNC:
3713     case PRIVATEREF:
3714     case FUNC0SUB:
3715     case UNIOPSUB:
3716     case LSTOPSUB:
3717         if (pl_yylval.opval)
3718             append_madprops(PL_thismad, pl_yylval.opval, 0);
3719         PL_thismad = 0;
3720         return optype;
3721
3722     /* fake EOF */
3723     case 0:
3724         optype = PEG;
3725         if (PL_endwhite) {
3726             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3727             PL_endwhite = 0;
3728         }
3729         break;
3730
3731     case ']':
3732     case '}':
3733         if (PL_faketokens)
3734             break;
3735         /* remember any fake bracket that lexer is about to discard */
3736         if (PL_lex_brackets == 1 &&
3737             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3738         {
3739             s = PL_bufptr;
3740             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3741                 s++;
3742             if (*s == '}') {
3743                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3744                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3745                 PL_thiswhite = 0;
3746                 PL_bufptr = s - 1;
3747                 break;  /* don't bother looking for trailing comment */
3748             }
3749             else
3750                 s = PL_bufptr;
3751         }
3752         if (optype == ']')
3753             break;
3754         /* FALLTHROUGH */
3755
3756     /* attach a trailing comment to its statement instead of next token */
3757     case ';':
3758         if (PL_faketokens)
3759             break;
3760         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3761             s = PL_bufptr;
3762             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3763                 s++;
3764             if (*s == '\n' || *s == '#') {
3765                 while (s < PL_bufend && *s != '\n')
3766                     s++;
3767                 if (s < PL_bufend)
3768                     s++;
3769                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3770                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3771                 PL_thiswhite = 0;
3772                 PL_bufptr = s;
3773             }
3774         }
3775         break;
3776
3777     /* pval */
3778     case LABEL:
3779         break;
3780
3781     /* ival */
3782     default:
3783         break;
3784
3785     }
3786
3787     /* Create new token struct.  Note: opvals return early above. */
3788     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3789     PL_thismad = 0;
3790     return optype;
3791 }
3792 #endif
3793
3794 STATIC char *
3795 S_tokenize_use(pTHX_ int is_use, char *s) {
3796     dVAR;
3797
3798     PERL_ARGS_ASSERT_TOKENIZE_USE;
3799
3800     if (PL_expect != XSTATE)
3801         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3802                     is_use ? "use" : "no"));
3803     s = SKIPSPACE1(s);
3804     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3805         s = force_version(s, TRUE);
3806         if (*s == ';' || *s == '}'
3807                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
3808             start_force(PL_curforce);
3809             NEXTVAL_NEXTTOKE.opval = NULL;
3810             force_next(WORD);
3811         }
3812         else if (*s == 'v') {
3813             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3814             s = force_version(s, FALSE);
3815         }
3816     }
3817     else {
3818         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3819         s = force_version(s, FALSE);
3820     }
3821     pl_yylval.ival = is_use;
3822     return s;
3823 }
3824 #ifdef DEBUGGING
3825     static const char* const exp_name[] =
3826         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3827           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3828         };
3829 #endif
3830
3831 /*
3832   yylex
3833
3834   Works out what to call the token just pulled out of the input
3835   stream.  The yacc parser takes care of taking the ops we return and
3836   stitching them into a tree.
3837
3838   Returns:
3839     PRIVATEREF
3840
3841   Structure:
3842       if read an identifier
3843           if we're in a my declaration
3844               croak if they tried to say my($foo::bar)
3845               build the ops for a my() declaration
3846           if it's an access to a my() variable
3847               are we in a sort block?
3848                   croak if my($a); $a <=> $b
3849               build ops for access to a my() variable
3850           if in a dq string, and they've said @foo and we can't find @foo
3851               croak
3852           build ops for a bareword
3853       if we already built the token before, use it.
3854 */
3855
3856
3857 #ifdef __SC__
3858 #pragma segment Perl_yylex
3859 #endif
3860 int
3861 Perl_yylex(pTHX)
3862 {
3863     dVAR;
3864     register char *s = PL_bufptr;
3865     register char *d;
3866     STRLEN len;
3867     bool bof = FALSE;
3868     U32 fake_eof = 0;
3869
3870     /* orig_keyword, gvp, and gv are initialized here because
3871      * jump to the label just_a_word_zero can bypass their
3872      * initialization later. */
3873     I32 orig_keyword = 0;
3874     GV *gv = NULL;
3875     GV **gvp = NULL;
3876
3877     DEBUG_T( {
3878         SV* tmp = newSVpvs("");
3879         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3880             (IV)CopLINE(PL_curcop),
3881             lex_state_names[PL_lex_state],
3882             exp_name[PL_expect],
3883             pv_display(tmp, s, strlen(s), 0, 60));
3884         SvREFCNT_dec(tmp);
3885     } );
3886     /* check if there's an identifier for us to look at */
3887     if (PL_pending_ident)
3888         return REPORT(S_pending_ident(aTHX));
3889
3890     /* no identifier pending identification */
3891
3892     switch (PL_lex_state) {
3893 #ifdef COMMENTARY
3894     case LEX_NORMAL:            /* Some compilers will produce faster */
3895     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3896         break;
3897 #endif
3898
3899     /* when we've already built the next token, just pull it out of the queue */
3900     case LEX_KNOWNEXT:
3901 #ifdef PERL_MAD
3902         PL_lasttoke--;
3903         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3904         if (PL_madskills) {
3905             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3906             PL_nexttoke[PL_lasttoke].next_mad = 0;
3907             if (PL_thismad && PL_thismad->mad_key == '_') {
3908                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3909                 PL_thismad->mad_val = 0;
3910                 mad_free(PL_thismad);
3911                 PL_thismad = 0;
3912             }
3913         }
3914         if (!PL_lasttoke) {
3915             PL_lex_state = PL_lex_defer;
3916             PL_expect = PL_lex_expect;
3917             PL_lex_defer = LEX_NORMAL;
3918             if (!PL_nexttoke[PL_lasttoke].next_type)
3919                 return yylex();
3920         }
3921 #else
3922         PL_nexttoke--;
3923         pl_yylval = PL_nextval[PL_nexttoke];
3924         if (!PL_nexttoke) {
3925             PL_lex_state = PL_lex_defer;
3926             PL_expect = PL_lex_expect;
3927             PL_lex_defer = LEX_NORMAL;
3928         }
3929 #endif
3930 #ifdef PERL_MAD
3931         /* FIXME - can these be merged?  */
3932         return(PL_nexttoke[PL_lasttoke].next_type);
3933 #else
3934         return REPORT(PL_nexttype[PL_nexttoke]);
3935 #endif
3936
3937     /* interpolated case modifiers like \L \U, including \Q and \E.
3938        when we get here, PL_bufptr is at the \
3939     */
3940     case LEX_INTERPCASEMOD:
3941 #ifdef DEBUGGING
3942         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3943             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3944 #endif
3945         /* handle \E or end of string */
3946         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3947             /* if at a \E */
3948             if (PL_lex_casemods) {
3949                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3950                 PL_lex_casestack[PL_lex_casemods] = '\0';
3951
3952                 if (PL_bufptr != PL_bufend
3953                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3954                     PL_bufptr += 2;
3955                     PL_lex_state = LEX_INTERPCONCAT;
3956 #ifdef PERL_MAD
3957                     if (PL_madskills)
3958                         PL_thistoken = newSVpvs("\\E");
3959 #endif
3960                 }
3961                 return REPORT(')');
3962             }
3963 #ifdef PERL_MAD
3964             while (PL_bufptr != PL_bufend &&
3965               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3966                 if (!PL_thiswhite)
3967                     PL_thiswhite = newSVpvs("");
3968                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3969                 PL_bufptr += 2;
3970             }
3971 #else
3972             if (PL_bufptr != PL_bufend)
3973                 PL_bufptr += 2;
3974 #endif
3975             PL_lex_state = LEX_INTERPCONCAT;
3976             return yylex();
3977         }
3978         else {
3979             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3980               "### Saw case modifier\n"); });
3981             s = PL_bufptr + 1;
3982             if (s[1] == '\\' && s[2] == 'E') {
3983 #ifdef PERL_MAD
3984                 if (!PL_thiswhite)
3985                     PL_thiswhite = newSVpvs("");
3986                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3987 #endif
3988                 PL_bufptr = s + 3;
3989                 PL_lex_state = LEX_INTERPCONCAT;
3990                 return yylex();
3991             }
3992             else {
3993                 I32 tmp;
3994                 if (!PL_madskills) /* when just compiling don't need correct */
3995                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3996                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
3997                 if ((*s == 'L' || *s == 'U') &&
3998                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3999                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4000                     return REPORT(')');
4001                 }
4002                 if (PL_lex_casemods > 10)
4003                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4004                 PL_lex_casestack[PL_lex_casemods++] = *s;
4005                 PL_lex_casestack[PL_lex_casemods] = '\0';
4006                 PL_lex_state = LEX_INTERPCONCAT;
4007                 start_force(PL_curforce);
4008                 NEXTVAL_NEXTTOKE.ival = 0;
4009                 force_next('(');
4010                 start_force(PL_curforce);
4011                 if (*s == 'l')
4012                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4013                 else if (*s == 'u')
4014                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4015                 else if (*s == 'L')
4016                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4017                 else if (*s == 'U')
4018                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4019                 else if (*s == 'Q')
4020                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4021                 else
4022                     Perl_croak(aTHX_ "panic: yylex");
4023                 if (PL_madskills) {
4024                     SV* const tmpsv = newSVpvs("\\ ");
4025                     /* replace the space with the character we want to escape
4026                      */
4027                     SvPVX(tmpsv)[1] = *s;
4028                     curmad('_', tmpsv);
4029                 }
4030                 PL_bufptr = s + 1;
4031             }
4032             force_next(FUNC);
4033             if (PL_lex_starts) {
4034                 s = PL_bufptr;
4035                 PL_lex_starts = 0;
4036 #ifdef PERL_MAD
4037                 if (PL_madskills) {
4038                     if (PL_thistoken)
4039                         sv_free(PL_thistoken);
4040                     PL_thistoken = newSVpvs("");
4041                 }
4042 #endif
4043                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4044                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4045                     OPERATOR(',');
4046                 else
4047                     Aop(OP_CONCAT);
4048             }
4049             else
4050                 return yylex();
4051         }
4052
4053     case LEX_INTERPPUSH:
4054         return REPORT(sublex_push());
4055
4056     case LEX_INTERPSTART:
4057         if (PL_bufptr == PL_bufend)
4058             return REPORT(sublex_done());
4059         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4060               "### Interpolated variable\n"); });
4061         PL_expect = XTERM;
4062         PL_lex_dojoin = (*PL_bufptr == '@');
4063         PL_lex_state = LEX_INTERPNORMAL;
4064         if (PL_lex_dojoin) {
4065             start_force(PL_curforce);
4066             NEXTVAL_NEXTTOKE.ival = 0;
4067             force_next(',');
4068             start_force(PL_curforce);
4069             force_ident("\"", '$');
4070             start_force(PL_curforce);
4071             NEXTVAL_NEXTTOKE.ival = 0;
4072             force_next('$');
4073             start_force(PL_curforce);
4074             NEXTVAL_NEXTTOKE.ival = 0;
4075             force_next('(');
4076             start_force(PL_curforce);
4077             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4078             force_next(FUNC);
4079         }
4080         if (PL_lex_starts++) {
4081             s = PL_bufptr;
4082 #ifdef PERL_MAD
4083             if (PL_madskills) {
4084                 if (PL_thistoken)
4085                     sv_free(PL_thistoken);
4086                 PL_thistoken = newSVpvs("");
4087             }
4088 #endif
4089             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4090             if (!PL_lex_casemods && PL_lex_inpat)
4091                 OPERATOR(',');
4092             else
4093                 Aop(OP_CONCAT);
4094         }
4095         return yylex();
4096
4097     case LEX_INTERPENDMAYBE:
4098         if (intuit_more(PL_bufptr)) {
4099             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4100             break;
4101         }
4102         /* FALL THROUGH */
4103
4104     case LEX_INTERPEND:
4105         if (PL_lex_dojoin) {
4106             PL_lex_dojoin = FALSE;
4107             PL_lex_state = LEX_INTERPCONCAT;
4108 #ifdef PERL_MAD
4109             if (PL_madskills) {
4110                 if (PL_thistoken)
4111                     sv_free(PL_thistoken);
4112                 PL_thistoken = newSVpvs("");
4113             }
4114 #endif
4115             return REPORT(')');
4116         }
4117         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4118             && SvEVALED(PL_lex_repl))
4119         {
4120             if (PL_bufptr != PL_bufend)
4121                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4122             PL_lex_repl = NULL;
4123         }
4124         /* FALLTHROUGH */
4125     case LEX_INTERPCONCAT:
4126 #ifdef DEBUGGING
4127         if (PL_lex_brackets)
4128             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4129 #endif
4130         if (PL_bufptr == PL_bufend)
4131             return REPORT(sublex_done());
4132
4133         if (SvIVX(PL_linestr) == '\'') {
4134             SV *sv = newSVsv(PL_linestr);
4135             if (!PL_lex_inpat)
4136                 sv = tokeq(sv);
4137             else if ( PL_hints & HINT_NEW_RE )
4138                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4139             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4140             s = PL_bufend;
4141         }
4142         else {
4143             s = scan_const(PL_bufptr);
4144             if (*s == '\\')
4145                 PL_lex_state = LEX_INTERPCASEMOD;
4146             else
4147                 PL_lex_state = LEX_INTERPSTART;
4148         }
4149
4150         if (s != PL_bufptr) {
4151             start_force(PL_curforce);
4152             if (PL_madskills) {
4153                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4154             }
4155             NEXTVAL_NEXTTOKE = pl_yylval;
4156             PL_expect = XTERM;
4157             force_next(THING);
4158             if (PL_lex_starts++) {
4159 #ifdef PERL_MAD
4160                 if (PL_madskills) {
4161                     if (PL_thistoken)
4162                         sv_free(PL_thistoken);
4163                     PL_thistoken = newSVpvs("");
4164                 }
4165 #endif
4166                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4167                 if (!PL_lex_casemods && PL_lex_inpat)
4168                     OPERATOR(',');
4169                 else
4170                     Aop(OP_CONCAT);
4171             }
4172             else {
4173                 PL_bufptr = s;
4174                 return yylex();
4175             }
4176         }
4177
4178         return yylex();
4179     case LEX_FORMLINE:
4180         PL_lex_state = LEX_NORMAL;
4181         s = scan_formline(PL_bufptr);
4182         if (!PL_lex_formbrack)
4183             goto rightbracket;
4184         OPERATOR(';');
4185     }
4186
4187     s = PL_bufptr;
4188     PL_oldoldbufptr = PL_oldbufptr;
4189     PL_oldbufptr = s;
4190
4191   retry:
4192 #ifdef PERL_MAD
4193     if (PL_thistoken) {
4194         sv_free(PL_thistoken);
4195         PL_thistoken = 0;
4196     }
4197     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4198 #endif
4199     switch (*s) {
4200     default:
4201         if (isIDFIRST_lazy_if(s,UTF))
4202             goto keylookup;
4203         {
4204         unsigned char c = *s;
4205         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4206         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4207             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4208         } else {
4209             d = PL_linestart;
4210         }
4211         *s = '\0';
4212         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4213     }
4214     case 4:
4215     case 26:
4216         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4217     case 0:
4218 #ifdef PERL_MAD
4219         if (PL_madskills)
4220             PL_faketokens = 0;
4221 #endif
4222         if (!PL_rsfp) {
4223             PL_last_uni = 0;
4224             PL_last_lop = 0;
4225             if (PL_lex_brackets) {
4226                 yyerror((const char *)
4227                         (PL_lex_formbrack
4228                          ? "Format not terminated"
4229                          : "Missing right curly or square bracket"));
4230             }
4231             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4232                         "### Tokener got EOF\n");
4233             } );
4234             TOKEN(0);
4235         }
4236         if (s++ < PL_bufend)
4237             goto retry;                 /* ignore stray nulls */
4238         PL_last_uni = 0;
4239         PL_last_lop = 0;
4240         if (!PL_in_eval && !PL_preambled) {
4241             PL_preambled = TRUE;
4242 #ifdef PERL_MAD
4243             if (PL_madskills)
4244                 PL_faketokens = 1;
4245 #endif
4246             if (PL_perldb) {
4247                 /* Generate a string of Perl code to load the debugger.
4248                  * If PERL5DB is set, it will return the contents of that,
4249                  * otherwise a compile-time require of perl5db.pl.  */
4250
4251                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4252
4253                 if (pdb) {
4254                     sv_setpv(PL_linestr, pdb);
4255                     sv_catpvs(PL_linestr,";");
4256                 } else {
4257                     SETERRNO(0,SS_NORMAL);
4258                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4259                 }
4260             } else
4261                 sv_setpvs(PL_linestr,"");
4262             if (PL_preambleav) {
4263                 SV **svp = AvARRAY(PL_preambleav);
4264                 SV **const end = svp + AvFILLp(PL_preambleav);
4265                 while(svp <= end) {
4266                     sv_catsv(PL_linestr, *svp);
4267                     ++svp;
4268                     sv_catpvs(PL_linestr, ";");
4269                 }
4270                 sv_free(MUTABLE_SV(PL_preambleav));
4271                 PL_preambleav = NULL;
4272             }
4273             if (PL_minus_E)
4274                 sv_catpvs(PL_linestr,
4275                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4276             if (PL_minus_n || PL_minus_p) {
4277                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4278                 if (PL_minus_l)
4279                     sv_catpvs(PL_linestr,"chomp;");
4280                 if (PL_minus_a) {
4281                     if (PL_minus_F) {
4282                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4283                              || *PL_splitstr == '"')
4284                               && strchr(PL_splitstr + 1, *PL_splitstr))
4285                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4286                         else {
4287                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4288                                bytes can be used as quoting characters.  :-) */
4289                             const char *splits = PL_splitstr;
4290                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4291                             do {
4292                                 /* Need to \ \s  */
4293                                 if (*splits == '\\')
4294                                     sv_catpvn(PL_linestr, splits, 1);
4295                                 sv_catpvn(PL_linestr, splits, 1);
4296                             } while (*splits++);
4297                             /* This loop will embed the trailing NUL of
4298                                PL_linestr as the last thing it does before
4299                                terminating.  */
4300                             sv_catpvs(PL_linestr, ");");
4301                         }
4302                     }
4303                     else
4304                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4305                 }
4306             }
4307             sv_catpvs(PL_linestr, "\n");
4308             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4309             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4310             PL_last_lop = PL_last_uni = NULL;
4311             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4312                 update_debugger_info(PL_linestr, NULL, 0);
4313             goto retry;
4314         }
4315         do {
4316             fake_eof = 0;
4317             bof = PL_rsfp ? TRUE : FALSE;
4318             if (0) {
4319               fake_eof:
4320                 fake_eof = LEX_FAKE_EOF;
4321             }
4322             PL_bufptr = PL_bufend;
4323             CopLINE_inc(PL_curcop);
4324             if (!lex_next_chunk(fake_eof)) {
4325                 CopLINE_dec(PL_curcop);
4326                 s = PL_bufptr;
4327                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4328             }
4329             CopLINE_dec(PL_curcop);
4330 #ifdef PERL_MAD
4331             if (!PL_rsfp)
4332                 PL_realtokenstart = -1;
4333 #endif
4334             s = PL_bufptr;
4335             /* If it looks like the start of a BOM or raw UTF-16,
4336              * check if it in fact is. */
4337             if (bof && PL_rsfp &&
4338                      (*s == 0 ||
4339                       *(U8*)s == 0xEF ||
4340                       *(U8*)s >= 0xFE ||
4341                       s[1] == 0)) {
4342                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4343                 if (bof) {
4344                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4345                     s = swallow_bom((U8*)s);
4346                 }
4347             }
4348             if (PL_doextract) {
4349                 /* Incest with pod. */
4350 #ifdef PERL_MAD
4351                 if (PL_madskills)
4352                     sv_catsv(PL_thiswhite, PL_linestr);
4353 #endif
4354                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4355                     sv_setpvs(PL_linestr, "");
4356                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4357                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4358                     PL_last_lop = PL_last_uni = NULL;
4359                     PL_doextract = FALSE;
4360                 }
4361             }
4362             if (PL_rsfp)
4363                 incline(s);
4364         } while (PL_doextract);
4365         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4366         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4367         PL_last_lop = PL_last_uni = NULL;
4368         if (CopLINE(PL_curcop) == 1) {
4369             while (s < PL_bufend && isSPACE(*s))
4370                 s++;
4371             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4372                 s++;
4373 #ifdef PERL_MAD
4374             if (PL_madskills)
4375                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4376 #endif
4377             d = NULL;
4378             if (!PL_in_eval) {
4379                 if (*s == '#' && *(s+1) == '!')
4380                     d = s + 2;
4381 #ifdef ALTERNATE_SHEBANG
4382                 else {
4383                     static char const as[] = ALTERNATE_SHEBANG;
4384                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4385                         d = s + (sizeof(as) - 1);
4386                 }
4387 #endif /* ALTERNATE_SHEBANG */
4388             }
4389             if (d) {
4390                 char *ipath;
4391                 char *ipathend;
4392
4393                 while (isSPACE(*d))
4394                     d++;
4395                 ipath = d;
4396                 while (*d && !isSPACE(*d))
4397                     d++;
4398                 ipathend = d;
4399
4400 #ifdef ARG_ZERO_IS_SCRIPT
4401                 if (ipathend > ipath) {
4402                     /*
4403                      * HP-UX (at least) sets argv[0] to the script name,
4404                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4405                      * at least, set argv[0] to the basename of the Perl
4406                      * interpreter. So, having found "#!", we'll set it right.
4407                      */
4408                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4409                                                     SVt_PV)); /* $^X */
4410                     assert(SvPOK(x) || SvGMAGICAL(x));
4411                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4412                         sv_setpvn(x, ipath, ipathend - ipath);
4413                         SvSETMAGIC(x);
4414                     }
4415                     else {
4416                         STRLEN blen;
4417                         STRLEN llen;
4418                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4419                         const char * const lstart = SvPV_const(x,llen);
4420                         if (llen < blen) {
4421                             bstart += blen - llen;
4422                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4423                                 sv_setpvn(x, ipath, ipathend - ipath);
4424                                 SvSETMAGIC(x);
4425                             }
4426                         }
4427                     }
4428                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4429                 }
4430 #endif /* ARG_ZERO_IS_SCRIPT */
4431
4432                 /*
4433                  * Look for options.
4434                  */
4435                 d = instr(s,"perl -");
4436                 if (!d) {
4437                     d = instr(s,"perl");
4438 #if defined(DOSISH)
4439                     /* avoid getting into infinite loops when shebang
4440                      * line contains "Perl" rather than "perl" */
4441                     if (!d) {
4442                         for (d = ipathend-4; d >= ipath; --d) {
4443                             if ((*d == 'p' || *d == 'P')
4444                                 && !ibcmp(d, "perl", 4))
4445                             {
4446                                 break;
4447                             }
4448                         }
4449                         if (d < ipath)
4450                             d = NULL;
4451                     }
4452 #endif
4453                 }
4454 #ifdef ALTERNATE_SHEBANG
4455                 /*
4456                  * If the ALTERNATE_SHEBANG on this system starts with a
4457                  * character that can be part of a Perl expression, then if
4458                  * we see it but not "perl", we're probably looking at the
4459                  * start of Perl code, not a request to hand off to some
4460                  * other interpreter.  Similarly, if "perl" is there, but
4461                  * not in the first 'word' of the line, we assume the line
4462                  * contains the start of the Perl program.
4463                  */
4464                 if (d && *s != '#') {
4465                     const char *c = ipath;
4466                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4467                         c++;
4468                     if (c < d)
4469                         d = NULL;       /* "perl" not in first word; ignore */
4470                     else
4471                         *s = '#';       /* Don't try to parse shebang line */
4472                 }
4473 #endif /* ALTERNATE_SHEBANG */
4474                 if (!d &&
4475                     *s == '#' &&
4476                     ipathend > ipath &&
4477                     !PL_minus_c &&
4478                     !instr(s,"indir") &&
4479                     instr(PL_origargv[0],"perl"))
4480                 {
4481                     dVAR;
4482                     char **newargv;
4483
4484                     *ipathend = '\0';
4485                     s = ipathend + 1;
4486                     while (s < PL_bufend && isSPACE(*s))
4487                         s++;
4488                     if (s < PL_bufend) {
4489                         Newx(newargv,PL_origargc+3,char*);
4490                         newargv[1] = s;
4491                         while (s < PL_bufend && !isSPACE(*s))
4492                             s++;
4493                         *s = '\0';
4494                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4495                     }
4496                     else
4497                         newargv = PL_origargv;
4498                     newargv[0] = ipath;
4499                     PERL_FPU_PRE_EXEC
4500                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4501                     PERL_FPU_POST_EXEC
4502                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4503                 }
4504                 if (d) {
4505                     while (*d && !isSPACE(*d))
4506                         d++;
4507                     while (SPACE_OR_TAB(*d))
4508                         d++;
4509
4510                     if (*d++ == '-') {
4511                         const bool switches_done = PL_doswitches;
4512                         const U32 oldpdb = PL_perldb;
4513                         const bool oldn = PL_minus_n;
4514                         const bool oldp = PL_minus_p;
4515                         const char *d1 = d;
4516
4517                         do {
4518                             bool baduni = FALSE;
4519                             if (*d1 == 'C') {
4520                                 const char *d2 = d1 + 1;
4521                                 if (parse_unicode_opts((const char **)&d2)
4522                                     != PL_unicode)
4523                                     baduni = TRUE;
4524                             }
4525                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4526                                 const char * const m = d1;
4527                                 while (*d1 && !isSPACE(*d1))
4528                                     d1++;
4529                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4530                                       (int)(d1 - m), m);
4531                             }
4532                             d1 = moreswitches(d1);
4533                         } while (d1);
4534                         if (PL_doswitches && !switches_done) {
4535                             int argc = PL_origargc;
4536                             char **argv = PL_origargv;
4537                             do {
4538                                 argc--,argv++;
4539                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4540                             init_argv_symbols(argc,argv);
4541                         }
4542                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4543                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4544                               /* if we have already added "LINE: while (<>) {",
4545                                  we must not do it again */
4546                         {
4547                             sv_setpvs(PL_linestr, "");
4548                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4549                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4550                             PL_last_lop = PL_last_uni = NULL;
4551                             PL_preambled = FALSE;
4552                             if (PERLDB_LINE || PERLDB_SAVESRC)
4553                                 (void)gv_fetchfile(PL_origfilename);
4554                             goto retry;
4555                         }
4556                     }
4557                 }
4558             }
4559         }
4560         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4561             PL_bufptr = s;
4562             PL_lex_state = LEX_FORMLINE;
4563             return yylex();
4564         }
4565         goto retry;
4566     case '\r':
4567 #ifdef PERL_STRICT_CR
4568         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4569         Perl_croak(aTHX_
4570       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4571 #endif
4572     case ' ': case '\t': case '\f': case 013:
4573 #ifdef PERL_MAD
4574         PL_realtokenstart = -1;
4575         if (!PL_thiswhite)
4576             PL_thiswhite = newSVpvs("");
4577         sv_catpvn(PL_thiswhite, s, 1);
4578 #endif
4579         s++;
4580         goto retry;
4581     case '#':
4582     case '\n':
4583 #ifdef PERL_MAD
4584         PL_realtokenstart = -1;
4585         if (PL_madskills)
4586             PL_faketokens = 0;
4587 #endif
4588         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4589             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4590                 /* handle eval qq[#line 1 "foo"\n ...] */
4591                 CopLINE_dec(PL_curcop);
4592                 incline(s);
4593             }
4594             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4595                 s = SKIPSPACE0(s);
4596                 if (!PL_in_eval || PL_rsfp)
4597                     incline(s);
4598             }
4599             else {
4600                 d = s;
4601                 while (d < PL_bufend && *d != '\n')
4602                     d++;
4603                 if (d < PL_bufend)
4604                     d++;
4605                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4606                   Perl_croak(aTHX_ "panic: input overflow");
4607 #ifdef PERL_MAD
4608                 if (PL_madskills)
4609                     PL_thiswhite = newSVpvn(s, d - s);
4610 #endif
4611                 s = d;
4612                 incline(s);
4613             }
4614             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4615                 PL_bufptr = s;
4616                 PL_lex_state = LEX_FORMLINE;
4617                 return yylex();
4618             }
4619         }
4620         else {
4621 #ifdef PERL_MAD
4622             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4623                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4624                     PL_faketokens = 0;
4625                     s = SKIPSPACE0(s);
4626                     TOKEN(PEG); /* make sure any #! line is accessible */
4627                 }
4628                 s = SKIPSPACE0(s);
4629             }
4630             else {
4631 /*              if (PL_madskills && PL_lex_formbrack) { */
4632                     d = s;
4633                     while (d < PL_bufend && *d != '\n')
4634                         d++;
4635                     if (d < PL_bufend)
4636                         d++;
4637                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4638                       Perl_croak(aTHX_ "panic: input overflow");
4639                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4640                         if (!PL_thiswhite)
4641                             PL_thiswhite = newSVpvs("");
4642                         if (CopLINE(PL_curcop) == 1) {
4643                             sv_setpvs(PL_thiswhite, "");
4644                             PL_faketokens = 0;
4645                         }
4646                         sv_catpvn(PL_thiswhite, s, d - s);
4647                     }
4648                     s = d;
4649 /*              }
4650                 *s = '\0';
4651                 PL_bufend = s; */
4652             }
4653 #else
4654             *s = '\0';
4655             PL_bufend = s;
4656 #endif
4657         }
4658         goto retry;
4659     case '-':
4660         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4661             I32 ftst = 0;
4662             char tmp;
4663
4664             s++;
4665             PL_bufptr = s;
4666             tmp = *s++;
4667
4668             while (s < PL_bufend && SPACE_OR_TAB(*s))
4669                 s++;
4670
4671             if (strnEQ(s,"=>",2)) {
4672                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4673                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4674                 OPERATOR('-');          /* unary minus */
4675             }
4676             PL_last_uni = PL_oldbufptr;
4677             switch (tmp) {
4678             case 'r': ftst = OP_FTEREAD;        break;
4679             case 'w': ftst = OP_FTEWRITE;       break;
4680             case 'x': ftst = OP_FTEEXEC;        break;
4681             case 'o': ftst = OP_FTEOWNED;       break;
4682             case 'R': ftst = OP_FTRREAD;        break;
4683             case 'W': ftst = OP_FTRWRITE;       break;
4684             case 'X': ftst = OP_FTREXEC;        break;
4685             case 'O': ftst = OP_FTROWNED;       break;
4686             case 'e': ftst = OP_FTIS;           break;
4687             case 'z': ftst = OP_FTZERO;         break;
4688             case 's': ftst = OP_FTSIZE;         break;
4689             case 'f': ftst = OP_FTFILE;         break;
4690             case 'd': ftst = OP_FTDIR;          break;
4691             case 'l': ftst = OP_FTLINK;         break;
4692             case 'p': ftst = OP_FTPIPE;         break;
4693             case 'S': ftst = OP_FTSOCK;         break;
4694             case 'u': ftst = OP_FTSUID;         break;
4695             case 'g': ftst = OP_FTSGID;         break;
4696             case 'k': ftst = OP_FTSVTX;         break;
4697             case 'b': ftst = OP_FTBLK;          break;
4698             case 'c': ftst = OP_FTCHR;          break;
4699             case 't': ftst = OP_FTTTY;          break;
4700             case 'T': ftst = OP_FTTEXT;         break;
4701             case 'B': ftst = OP_FTBINARY;       break;
4702             case 'M': case 'A': case 'C':
4703                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4704                 switch (tmp) {
4705                 case 'M': ftst = OP_FTMTIME;    break;
4706                 case 'A': ftst = OP_FTATIME;    break;
4707                 case 'C': ftst = OP_FTCTIME;    break;
4708                 default:                        break;
4709                 }
4710                 break;
4711             default:
4712                 break;
4713             }
4714             if (ftst) {
4715                 PL_last_lop_op = (OPCODE)ftst;
4716                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4717                         "### Saw file test %c\n", (int)tmp);
4718                 } );
4719                 FTST(ftst);
4720             }
4721             else {
4722                 /* Assume it was a minus followed by a one-letter named
4723                  * subroutine call (or a -bareword), then. */
4724                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4725                         "### '-%c' looked like a file test but was not\n",
4726                         (int) tmp);
4727                 } );
4728                 s = --PL_bufptr;
4729             }
4730         }
4731         {
4732             const char tmp = *s++;
4733             if (*s == tmp) {
4734                 s++;
4735                 if (PL_expect == XOPERATOR)
4736                     TERM(POSTDEC);
4737                 else
4738                     OPERATOR(PREDEC);
4739             }
4740             else if (*s == '>') {
4741                 s++;
4742                 s = SKIPSPACE1(s);
4743                 if (isIDFIRST_lazy_if(s,UTF)) {
4744                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4745                     TOKEN(ARROW);
4746                 }
4747                 else if (*s == '$')
4748                     OPERATOR(ARROW);
4749                 else
4750                     TERM(ARROW);
4751             }
4752             if (PL_expect == XOPERATOR)
4753                 Aop(OP_SUBTRACT);
4754             else {
4755                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4756                     check_uni();
4757                 OPERATOR('-');          /* unary minus */
4758             }
4759         }
4760
4761     case '+':
4762         {
4763             const char tmp = *s++;
4764             if (*s == tmp) {
4765                 s++;
4766                 if (PL_expect == XOPERATOR)
4767                     TERM(POSTINC);
4768                 else
4769                     OPERATOR(PREINC);
4770             }
4771             if (PL_expect == XOPERATOR)
4772                 Aop(OP_ADD);
4773             else {
4774                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4775                     check_uni();
4776                 OPERATOR('+');
4777             }
4778         }
4779
4780     case '*':
4781         if (PL_expect != XOPERATOR) {
4782             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4783             PL_expect = XOPERATOR;
4784             force_ident(PL_tokenbuf, '*');
4785             if (!*PL_tokenbuf)
4786                 PREREF('*');
4787             TERM('*');
4788         }
4789         s++;
4790         if (*s == '*') {
4791             s++;
4792             PWop(OP_POW);
4793         }
4794         Mop(OP_MULTIPLY);
4795
4796     case '%':
4797         if (PL_expect == XOPERATOR) {
4798             ++s;
4799             Mop(OP_MODULO);
4800         }
4801         PL_tokenbuf[0] = '%';
4802         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4803                 sizeof PL_tokenbuf - 1, FALSE);
4804         if (!PL_tokenbuf[1]) {
4805             PREREF('%');
4806         }
4807         PL_pending_ident = '%';
4808         TERM('%');
4809
4810     case '^':
4811         s++;
4812         BOop(OP_BIT_XOR);
4813     case '[':
4814         PL_lex_brackets++;
4815         {
4816             const char tmp = *s++;
4817             OPERATOR(tmp);
4818         }
4819     case '~':
4820         if (s[1] == '~'
4821             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4822         {
4823             s += 2;
4824             Eop(OP_SMARTMATCH);
4825         }
4826     case ',':
4827         {
4828             const char tmp = *s++;
4829             OPERATOR(tmp);
4830         }
4831     case ':':
4832         if (s[1] == ':') {
4833             len = 0;
4834             goto just_a_word_zero_gv;
4835         }
4836         s++;
4837         switch (PL_expect) {
4838             OP *attrs;
4839 #ifdef PERL_MAD
4840             I32 stuffstart;
4841 #endif
4842         case XOPERATOR:
4843             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4844                 break;
4845             PL_bufptr = s;      /* update in case we back off */
4846             if (*s == '=') {
4847                 deprecate(":= for an empty attribute list");
4848             }
4849             goto grabattrs;
4850         case XATTRBLOCK:
4851             PL_expect = XBLOCK;
4852             goto grabattrs;
4853         case XATTRTERM:
4854             PL_expect = XTERMBLOCK;
4855          grabattrs:
4856 #ifdef PERL_MAD
4857             stuffstart = s - SvPVX(PL_linestr) - 1;
4858 #endif
4859             s = PEEKSPACE(s);
4860             attrs = NULL;
4861             while (isIDFIRST_lazy_if(s,UTF)) {
4862                 I32 tmp;
4863                 SV *sv;
4864                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4865                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4866                     if (tmp < 0) tmp = -tmp;
4867                     switch (tmp) {
4868                     case KEY_or:
4869                     case KEY_and:
4870                     case KEY_for:
4871                     case KEY_foreach:
4872                     case KEY_unless:
4873                     case KEY_if:
4874                     case KEY_while:
4875                     case KEY_until:
4876                         goto got_attrs;
4877                     default:
4878                         break;
4879                     }
4880                 }
4881                 sv = newSVpvn(s, len);
4882                 if (*d == '(') {
4883                     d = scan_str(d,TRUE,TRUE);
4884                     if (!d) {
4885                         /* MUST advance bufptr here to avoid bogus
4886                            "at end of line" context messages from yyerror().
4887                          */
4888                         PL_bufptr = s + len;
4889                         yyerror("Unterminated attribute parameter in attribute list");
4890                         if (attrs)
4891                             op_free(attrs);
4892                         sv_free(sv);
4893                         return REPORT(0);       /* EOF indicator */
4894                     }
4895                 }
4896                 if (PL_lex_stuff) {
4897                     sv_catsv(sv, PL_lex_stuff);
4898                     attrs = append_elem(OP_LIST, attrs,
4899                                         newSVOP(OP_CONST, 0, sv));
4900                     SvREFCNT_dec(PL_lex_stuff);
4901                     PL_lex_stuff = NULL;
4902                 }
4903                 else {
4904                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4905                         sv_free(sv);
4906                         if (PL_in_my == KEY_our) {
4907                             deprecate(":unique");
4908                         }
4909                         else
4910                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4911                     }
4912
4913                     /* NOTE: any CV attrs applied here need to be part of
4914                        the CVf_BUILTIN_ATTRS define in cv.h! */
4915                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4916                         sv_free(sv);
4917                         CvLVALUE_on(PL_compcv);
4918                     }
4919                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4920                         sv_free(sv);
4921                         deprecate(":locked");
4922                     }
4923                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4924                         sv_free(sv);
4925                         CvMETHOD_on(PL_compcv);
4926                     }
4927                     /* After we've set the flags, it could be argued that
4928                        we don't need to do the attributes.pm-based setting
4929                        process, and shouldn't bother appending recognized
4930                        flags.  To experiment with that, uncomment the
4931                        following "else".  (Note that's already been
4932                        uncommented.  That keeps the above-applied built-in
4933                        attributes from being intercepted (and possibly
4934                        rejected) by a package's attribute routines, but is
4935                        justified by the performance win for the common case
4936                        of applying only built-in attributes.) */
4937                     else
4938                         attrs = append_elem(OP_LIST, attrs,
4939                                             newSVOP(OP_CONST, 0,
4940                                                     sv));
4941                 }
4942                 s = PEEKSPACE(d);
4943                 if (*s == ':' && s[1] != ':')
4944                     s = PEEKSPACE(s+1);
4945                 else if (s == d)
4946                     break;      /* require real whitespace or :'s */
4947                 /* XXX losing whitespace on sequential attributes here */
4948             }
4949             {
4950                 const char tmp
4951                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4952                 if (*s != ';' && *s != '}' && *s != tmp
4953                     && (tmp != '=' || *s != ')')) {
4954                     const char q = ((*s == '\'') ? '"' : '\'');
4955                     /* If here for an expression, and parsed no attrs, back
4956                        off. */
4957                     if (tmp == '=' && !attrs) {
4958                         s = PL_bufptr;
4959                         break;
4960                     }
4961                     /* MUST advance bufptr here to avoid bogus "at end of line"
4962                        context messages from yyerror().
4963                     */
4964                     PL_bufptr = s;
4965                     yyerror( (const char *)
4966                              (*s
4967                               ? Perl_form(aTHX_ "Invalid separator character "
4968                                           "%c%c%c in attribute list", q, *s, q)
4969                               : "Unterminated attribute list" ) );
4970                     if (attrs)
4971                         op_free(attrs);
4972                     OPERATOR(':');
4973                 }
4974             }
4975         got_attrs:
4976             if (attrs) {
4977                 start_force(PL_curforce);
4978                 NEXTVAL_NEXTTOKE.opval = attrs;
4979                 CURMAD('_', PL_nextwhite);
4980                 force_next(THING);
4981             }
4982 #ifdef PERL_MAD
4983             if (PL_madskills) {
4984                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4985                                      (s - SvPVX(PL_linestr)) - stuffstart);
4986             }
4987 #endif
4988             TOKEN(COLONATTR);
4989         }
4990         OPERATOR(':');
4991     case '(':
4992         s++;
4993         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4994             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
4995         else
4996             PL_expect = XTERM;
4997         s = SKIPSPACE1(s);
4998         TOKEN('(');
4999     case ';':
5000         CLINE;
5001         {
5002             const char tmp = *s++;
5003             OPERATOR(tmp);
5004         }
5005     case ')':
5006         {
5007             const char tmp = *s++;
5008             s = SKIPSPACE1(s);
5009             if (*s == '{')
5010                 PREBLOCK(tmp);
5011             TERM(tmp);
5012         }
5013     case ']':
5014         s++;
5015         if (PL_lex_brackets <= 0)
5016             yyerror("Unmatched right square bracket");
5017         else
5018             --PL_lex_brackets;
5019         if (PL_lex_state == LEX_INTERPNORMAL) {
5020             if (PL_lex_brackets == 0) {
5021                 if (*s == '-' && s[1] == '>')
5022                     PL_lex_state = LEX_INTERPENDMAYBE;
5023                 else if (*s != '[' && *s != '{')
5024                     PL_lex_state = LEX_INTERPEND;
5025             }
5026         }
5027         TERM(']');
5028     case '{':
5029       leftbracket:
5030         s++;
5031         if (PL_lex_brackets > 100) {
5032             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5033         }
5034         switch (PL_expect) {
5035         case XTERM:
5036             if (PL_lex_formbrack) {
5037                 s--;
5038                 PRETERMBLOCK(DO);
5039             }
5040             if (PL_oldoldbufptr == PL_last_lop)
5041                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5042             else
5043                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5044             OPERATOR(HASHBRACK);
5045         case XOPERATOR:
5046             while (s < PL_bufend && SPACE_OR_TAB(*s))
5047                 s++;
5048             d = s;
5049             PL_tokenbuf[0] = '\0';
5050             if (d < PL_bufend && *d == '-') {
5051                 PL_tokenbuf[0] = '-';
5052                 d++;
5053                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5054                     d++;
5055             }
5056             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5057                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5058                               FALSE, &len);
5059                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5060                     d++;
5061                 if (*d == '}') {
5062                     const char minus = (PL_tokenbuf[0] == '-');
5063                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5064                     if (minus)
5065                         force_next('-');
5066                 }
5067             }
5068             /* FALL THROUGH */
5069         case XATTRBLOCK:
5070         case XBLOCK:
5071             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5072             PL_expect = XSTATE;
5073             break;
5074         case XATTRTERM:
5075         case XTERMBLOCK:
5076             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5077             PL_expect = XSTATE;
5078             break;
5079         default: {
5080                 const char *t;
5081                 if (PL_oldoldbufptr == PL_last_lop)
5082                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5083                 else
5084                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5085                 s = SKIPSPACE1(s);
5086                 if (*s == '}') {
5087                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5088                         PL_expect = XTERM;
5089                         /* This hack is to get the ${} in the message. */
5090                         PL_bufptr = s+1;
5091                         yyerror("syntax error");
5092                         break;
5093                     }
5094                     OPERATOR(HASHBRACK);
5095                 }
5096                 /* This hack serves to disambiguate a pair of curlies
5097                  * as being a block or an anon hash.  Normally, expectation
5098                  * determines that, but in cases where we're not in a
5099                  * position to expect anything in particular (like inside
5100                  * eval"") we have to resolve the ambiguity.  This code
5101                  * covers the case where the first term in the curlies is a
5102                  * quoted string.  Most other cases need to be explicitly
5103                  * disambiguated by prepending a "+" before the opening
5104                  * curly in order to force resolution as an anon hash.
5105                  *
5106                  * XXX should probably propagate the outer expectation
5107                  * into eval"" to rely less on this hack, but that could
5108                  * potentially break current behavior of eval"".
5109                  * GSAR 97-07-21
5110                  */
5111                 t = s;
5112                 if (*s == '\'' || *s == '"' || *s == '`') {
5113                     /* common case: get past first string, handling escapes */
5114                     for (t++; t < PL_bufend && *t != *s;)
5115                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5116                             t++;
5117                     t++;
5118                 }
5119                 else if (*s == 'q') {
5120                     if (++t < PL_bufend
5121                         && (!isALNUM(*t)
5122                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5123                                 && !isALNUM(*t))))
5124                     {
5125                         /* skip q//-like construct */
5126                         const char *tmps;
5127                         char open, close, term;
5128                         I32 brackets = 1;
5129
5130                         while (t < PL_bufend && isSPACE(*t))
5131                             t++;
5132                         /* check for q => */
5133                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5134                             OPERATOR(HASHBRACK);
5135                         }
5136                         term = *t;
5137                         open = term;
5138                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5139                             term = tmps[5];
5140                         close = term;
5141                         if (open == close)
5142                             for (t++; t < PL_bufend; t++) {
5143                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5144                                     t++;
5145                                 else if (*t == open)
5146                                     break;
5147                             }
5148                         else {
5149                             for (t++; t < PL_bufend; t++) {
5150                                 if (*t == '\\' && t+1 < PL_bufend)
5151                                     t++;
5152                                 else if (*t == close && --brackets <= 0)
5153                                     break;
5154                                 else if (*t == open)
5155                                     brackets++;
5156                             }
5157                         }
5158                         t++;
5159                     }
5160                     else
5161                         /* skip plain q word */
5162                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5163                              t += UTF8SKIP(t);
5164                 }
5165                 else if (isALNUM_lazy_if(t,UTF)) {
5166                     t += UTF8SKIP(t);
5167                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5168                          t += UTF8SKIP(t);
5169                 }
5170                 while (t < PL_bufend && isSPACE(*t))
5171                     t++;
5172                 /* if comma follows first term, call it an anon hash */
5173                 /* XXX it could be a comma expression with loop modifiers */
5174                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5175                                    || (*t == '=' && t[1] == '>')))
5176                     OPERATOR(HASHBRACK);
5177                 if (PL_expect == XREF)
5178                     PL_expect = XTERM;
5179                 else {
5180                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5181                     PL_expect = XSTATE;
5182                 }
5183             }
5184             break;
5185         }
5186         pl_yylval.ival = CopLINE(PL_curcop);
5187         if (isSPACE(*s) || *s == '#')
5188             PL_copline = NOLINE;   /* invalidate current command line number */
5189         TOKEN('{');
5190     case '}':
5191       rightbracket:
5192         s++;
5193         if (PL_lex_brackets <= 0)
5194             yyerror("Unmatched right curly bracket");
5195         else
5196             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5197         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5198             PL_lex_formbrack = 0;
5199         if (PL_lex_state == LEX_INTERPNORMAL) {
5200             if (PL_lex_brackets == 0) {
5201                 if (PL_expect & XFAKEBRACK) {
5202                     PL_expect &= XENUMMASK;
5203                     PL_lex_state = LEX_INTERPEND;
5204                     PL_bufptr = s;
5205 #if 0
5206                     if (PL_madskills) {
5207                         if (!PL_thiswhite)
5208                             PL_thiswhite = newSVpvs("");
5209                         sv_catpvs(PL_thiswhite,"}");
5210                     }
5211 #endif
5212                     return yylex();     /* ignore fake brackets */
5213                 }
5214                 if (*s == '-' && s[1] == '>')
5215                     PL_lex_state = LEX_INTERPENDMAYBE;
5216                 else if (*s != '[' && *s != '{')
5217                     PL_lex_state = LEX_INTERPEND;
5218             }
5219         }
5220         if (PL_expect & XFAKEBRACK) {
5221             PL_expect &= XENUMMASK;
5222             PL_bufptr = s;
5223             return yylex();             /* ignore fake brackets */
5224         }
5225         start_force(PL_curforce);
5226         if (PL_madskills) {
5227             curmad('X', newSVpvn(s-1,1));
5228             CURMAD('_', PL_thiswhite);
5229         }
5230         force_next('}');
5231 #ifdef PERL_MAD
5232         if (!PL_thistoken)
5233             PL_thistoken = newSVpvs("");
5234 #endif
5235         TOKEN(';');
5236     case '&':
5237         s++;
5238         if (*s++ == '&')
5239             AOPERATOR(ANDAND);
5240         s--;
5241         if (PL_expect == XOPERATOR) {
5242             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5243                 && isIDFIRST_lazy_if(s,UTF))
5244             {
5245                 CopLINE_dec(PL_curcop);
5246                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5247                 CopLINE_inc(PL_curcop);
5248             }
5249             BAop(OP_BIT_AND);
5250         }
5251
5252         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5253         if (*PL_tokenbuf) {
5254             PL_expect = XOPERATOR;
5255             force_ident(PL_tokenbuf, '&');
5256         }
5257         else
5258             PREREF('&');
5259         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5260         TERM('&');
5261
5262     case '|':
5263         s++;
5264         if (*s++ == '|')
5265             AOPERATOR(OROR);
5266         s--;
5267         BOop(OP_BIT_OR);
5268     case '=':
5269         s++;
5270         {
5271             const char tmp = *s++;
5272             if (tmp == '=')
5273                 Eop(OP_EQ);
5274             if (tmp == '>')
5275                 OPERATOR(',');
5276             if (tmp == '~')
5277                 PMop(OP_MATCH);
5278             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5279                 && strchr("+-*/%.^&|<",tmp))
5280                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5281                             "Reversed %c= operator",(int)tmp);
5282             s--;
5283             if (PL_expect == XSTATE && isALPHA(tmp) &&
5284                 (s == PL_linestart+1 || s[-2] == '\n') )
5285                 {
5286                     if (PL_in_eval && !PL_rsfp) {
5287                         d = PL_bufend;
5288                         while (s < d) {
5289                             if (*s++ == '\n') {
5290                                 incline(s);
5291                                 if (strnEQ(s,"=cut",4)) {
5292                                     s = strchr(s,'\n');
5293                                     if (s)
5294                                         s++;
5295                                     else
5296                                         s = d;
5297                                     incline(s);
5298                                     goto retry;
5299                                 }
5300                             }
5301                         }
5302                         goto retry;
5303                     }
5304 #ifdef PERL_MAD
5305                     if (PL_madskills) {
5306                         if (!PL_thiswhite)
5307                             PL_thiswhite = newSVpvs("");
5308                         sv_catpvn(PL_thiswhite, PL_linestart,
5309                                   PL_bufend - PL_linestart);
5310                     }
5311 #endif
5312                     s = PL_bufend;
5313                     PL_doextract = TRUE;
5314                     goto retry;
5315                 }
5316         }
5317         if (PL_lex_brackets < PL_lex_formbrack) {
5318             const char *t = s;
5319 #ifdef PERL_STRICT_CR
5320             while (SPACE_OR_TAB(*t))
5321 #else
5322             while (SPACE_OR_TAB(*t) || *t == '\r')
5323 #endif
5324                 t++;
5325             if (*t == '\n' || *t == '#') {
5326                 s--;
5327                 PL_expect = XBLOCK;
5328                 goto leftbracket;
5329             }
5330         }
5331         pl_yylval.ival = 0;
5332         OPERATOR(ASSIGNOP);
5333     case '!':
5334         s++;
5335         {
5336             const char tmp = *s++;
5337             if (tmp == '=') {
5338                 /* was this !=~ where !~ was meant?
5339                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5340
5341                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5342                     const char *t = s+1;
5343
5344                     while (t < PL_bufend && isSPACE(*t))
5345                         ++t;
5346
5347                     if (*t == '/' || *t == '?' ||
5348                         ((*t == 'm' || *t == 's' || *t == 'y')
5349                          && !isALNUM(t[1])) ||
5350                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5351                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5352                                     "!=~ should be !~");
5353                 }
5354                 Eop(OP_NE);
5355             }
5356             if (tmp == '~')
5357                 PMop(OP_NOT);
5358         }
5359         s--;
5360         OPERATOR('!');
5361     case '<':
5362         if (PL_expect != XOPERATOR) {
5363             if (s[1] != '<' && !strchr(s,'>'))
5364                 check_uni();
5365             if (s[1] == '<')
5366                 s = scan_heredoc(s);
5367             else
5368                 s = scan_inputsymbol(s);
5369             TERM(sublex_start());
5370         }
5371         s++;
5372         {
5373             char tmp = *s++;
5374             if (tmp == '<')
5375                 SHop(OP_LEFT_SHIFT);
5376             if (tmp == '=') {
5377                 tmp = *s++;
5378                 if (tmp == '>')
5379                     Eop(OP_NCMP);
5380                 s--;
5381                 Rop(OP_LE);
5382             }
5383         }
5384         s--;
5385         Rop(OP_LT);
5386     case '>':
5387         s++;
5388         {
5389             const char tmp = *s++;
5390             if (tmp == '>')
5391                 SHop(OP_RIGHT_SHIFT);
5392             else if (tmp == '=')
5393                 Rop(OP_GE);
5394         }
5395         s--;
5396         Rop(OP_GT);
5397
5398     case '$':
5399         CLINE;
5400
5401         if (PL_expect == XOPERATOR) {
5402             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5403                 return deprecate_commaless_var_list();
5404             }
5405         }
5406
5407         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5408             PL_tokenbuf[0] = '@';
5409             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5410                            sizeof PL_tokenbuf - 1, FALSE);
5411             if (PL_expect == XOPERATOR)
5412                 no_op("Array length", s);
5413             if (!PL_tokenbuf[1])
5414                 PREREF(DOLSHARP);
5415             PL_expect = XOPERATOR;
5416             PL_pending_ident = '#';
5417             TOKEN(DOLSHARP);
5418         }
5419
5420         PL_tokenbuf[0] = '$';
5421         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5422                        sizeof PL_tokenbuf - 1, FALSE);
5423         if (PL_expect == XOPERATOR)
5424             no_op("Scalar", s);
5425         if (!PL_tokenbuf[1]) {
5426             if (s == PL_bufend)
5427                 yyerror("Final $ should be \\$ or $name");
5428             PREREF('$');
5429         }
5430
5431         /* This kludge not intended to be bulletproof. */
5432         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5433             pl_yylval.opval = newSVOP(OP_CONST, 0,
5434                                    newSViv(CopARYBASE_get(&PL_compiling)));
5435             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5436             TERM(THING);
5437         }
5438
5439         d = s;
5440         {
5441             const char tmp = *s;
5442             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5443                 s = SKIPSPACE1(s);
5444
5445             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5446                 && intuit_more(s)) {
5447                 if (*s == '[') {
5448                     PL_tokenbuf[0] = '@';
5449                     if (ckWARN(WARN_SYNTAX)) {
5450                         char *t = s+1;
5451
5452                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5453                             t++;
5454                         if (*t++ == ',') {
5455                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5456                             while (t < PL_bufend && *t != ']')
5457                                 t++;
5458                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5459                                         "Multidimensional syntax %.*s not supported",
5460                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5461                         }
5462                     }
5463                 }
5464                 else if (*s == '{') {
5465                     char *t;
5466                     PL_tokenbuf[0] = '%';
5467                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5468                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5469                         {
5470                             char tmpbuf[sizeof PL_tokenbuf];
5471                             do {
5472                                 t++;
5473                             } while (isSPACE(*t));
5474                             if (isIDFIRST_lazy_if(t,UTF)) {
5475                                 STRLEN len;
5476                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5477                                               &len);
5478                                 while (isSPACE(*t))
5479                                     t++;
5480                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5481                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5482                                                 "You need to quote \"%s\"",
5483                                                 tmpbuf);
5484                             }
5485                         }
5486                 }
5487             }
5488
5489             PL_expect = XOPERATOR;
5490             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5491                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5492                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5493                     PL_expect = XOPERATOR;
5494                 else if (strchr("$@\"'`q", *s))
5495                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5496                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5497                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5498                 else if (isIDFIRST_lazy_if(s,UTF)) {
5499                     char tmpbuf[sizeof PL_tokenbuf];
5500                     int t2;
5501                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5502                     if ((t2 = keyword(tmpbuf, len, 0))) {
5503                         /* binary operators exclude handle interpretations */
5504                         switch (t2) {
5505                         case -KEY_x:
5506                         case -KEY_eq:
5507                         case -KEY_ne:
5508                         case -KEY_gt:
5509                         case -KEY_lt:
5510                         case -KEY_ge:
5511                         case -KEY_le:
5512                         case -KEY_cmp:
5513                             break;
5514                         default:
5515                             PL_expect = XTERM;  /* e.g. print $fh length() */
5516                             break;
5517                         }
5518                     }
5519                     else {
5520                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5521                     }
5522                 }
5523                 else if (isDIGIT(*s))
5524                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5525                 else if (*s == '.' && isDIGIT(s[1]))
5526                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5527                 else if ((*s == '?' || *s == '-' || *s == '+')
5528                          && !isSPACE(s[1]) && s[1] != '=')
5529                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5530                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5531                          && s[1] != '/')
5532                     PL_expect = XTERM;          /* e.g. print $fh /.../
5533                                                    XXX except DORDOR operator
5534                                                 */
5535                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5536                          && s[2] != '=')
5537                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5538             }
5539         }
5540         PL_pending_ident = '$';
5541         TOKEN('$');
5542
5543     case '@':
5544         if (PL_expect == XOPERATOR)
5545             no_op("Array", s);
5546         PL_tokenbuf[0] = '@';
5547         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5548         if (!PL_tokenbuf[1]) {
5549             PREREF('@');
5550         }
5551         if (PL_lex_state == LEX_NORMAL)
5552             s = SKIPSPACE1(s);
5553         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5554             if (*s == '{')
5555                 PL_tokenbuf[0] = '%';
5556
5557             /* Warn about @ where they meant $. */
5558             if (*s == '[' || *s == '{') {
5559                 if (ckWARN(WARN_SYNTAX)) {
5560                     const char *t = s + 1;
5561                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5562                         t++;
5563                     if (*t == '}' || *t == ']') {
5564                         t++;
5565                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5566                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5567                             "Scalar value %.*s better written as $%.*s",
5568                             (int)(t-PL_bufptr), PL_bufptr,
5569                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5570                     }
5571                 }
5572             }
5573         }
5574         PL_pending_ident = '@';
5575         TERM('@');
5576
5577      case '/':                  /* may be division, defined-or, or pattern */
5578         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5579             s += 2;
5580             AOPERATOR(DORDOR);
5581         }
5582      case '?':                  /* may either be conditional or pattern */
5583         if (PL_expect == XOPERATOR) {
5584              char tmp = *s++;
5585              if(tmp == '?') {
5586                 OPERATOR('?');
5587              }
5588              else {
5589                  tmp = *s++;
5590                  if(tmp == '/') {
5591                      /* A // operator. */
5592                     AOPERATOR(DORDOR);
5593                  }
5594                  else {
5595                      s--;
5596                      Mop(OP_DIVIDE);
5597                  }
5598              }
5599          }
5600          else {
5601              /* Disable warning on "study /blah/" */
5602              if (PL_oldoldbufptr == PL_last_uni
5603               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5604                   || memNE(PL_last_uni, "study", 5)
5605                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5606               ))
5607                  check_uni();
5608              s = scan_pat(s,OP_MATCH);
5609              TERM(sublex_start());
5610          }
5611
5612     case '.':
5613         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5614 #ifdef PERL_STRICT_CR
5615             && s[1] == '\n'
5616 #else
5617             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5618 #endif
5619             && (s == PL_linestart || s[-1] == '\n') )
5620         {
5621             PL_lex_formbrack = 0;
5622             PL_expect = XSTATE;
5623             goto rightbracket;
5624         }
5625         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5626             s += 3;
5627             OPERATOR(YADAYADA);
5628         }
5629         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5630             char tmp = *s++;
5631             if (*s == tmp) {
5632                 s++;
5633                 if (*s == tmp) {
5634                     s++;
5635                     pl_yylval.ival = OPf_SPECIAL;
5636                 }
5637                 else
5638                     pl_yylval.ival = 0;
5639                 OPERATOR(DOTDOT);
5640             }
5641             Aop(OP_CONCAT);
5642         }
5643         /* FALL THROUGH */
5644     case '0': case '1': case '2': case '3': case '4':
5645     case '5': case '6': case '7': case '8': case '9':
5646         s = scan_num(s, &pl_yylval);
5647         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5648         if (PL_expect == XOPERATOR)
5649             no_op("Number",s);
5650         TERM(THING);
5651
5652     case '\'':
5653         s = scan_str(s,!!PL_madskills,FALSE);
5654         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5655         if (PL_expect == XOPERATOR) {
5656             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5657                 return deprecate_commaless_var_list();
5658             }
5659             else
5660                 no_op("String",s);
5661         }
5662         if (!s)
5663             missingterm(NULL);
5664         pl_yylval.ival = OP_CONST;
5665         TERM(sublex_start());
5666
5667     case '"':
5668         s = scan_str(s,!!PL_madskills,FALSE);
5669         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5670         if (PL_expect == XOPERATOR) {
5671             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5672                 return deprecate_commaless_var_list();
5673             }
5674             else
5675                 no_op("String",s);
5676         }
5677         if (!s)
5678             missingterm(NULL);
5679         pl_yylval.ival = OP_CONST;
5680         /* FIXME. I think that this can be const if char *d is replaced by
5681            more localised variables.  */
5682         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5683             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5684                 pl_yylval.ival = OP_STRINGIFY;
5685                 break;
5686             }
5687         }
5688         TERM(sublex_start());
5689
5690     case '`':
5691         s = scan_str(s,!!PL_madskills,FALSE);
5692         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5693         if (PL_expect == XOPERATOR)
5694             no_op("Backticks",s);
5695         if (!s)
5696             missingterm(NULL);
5697         readpipe_override();
5698         TERM(sublex_start());
5699
5700     case '\\':
5701         s++;
5702         if (PL_lex_inwhat && isDIGIT(*s))
5703             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5704                            *s, *s);
5705         if (PL_expect == XOPERATOR)
5706             no_op("Backslash",s);
5707         OPERATOR(REFGEN);
5708
5709     case 'v':
5710         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5711             char *start = s + 2;
5712             while (isDIGIT(*start) || *start == '_')
5713                 start++;
5714             if (*start == '.' && isDIGIT(start[1])) {
5715                 s = scan_num(s, &pl_yylval);
5716                 TERM(THING);
5717             }
5718             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5719             else if (!isALPHA(*start) && (PL_expect == XTERM
5720                         || PL_expect == XREF || PL_expect == XSTATE
5721                         || PL_expect == XTERMORDORDOR)) {
5722                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5723                 if (!gv) {
5724                     s = scan_num(s, &pl_yylval);
5725                     TERM(THING);
5726                 }
5727             }
5728         }
5729         goto keylookup;
5730     case 'x':
5731         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5732             s++;
5733             Mop(OP_REPEAT);
5734         }
5735         goto keylookup;
5736
5737     case '_':
5738     case 'a': case 'A':
5739     case 'b': case 'B':
5740     case 'c': case 'C':
5741     case 'd': case 'D':
5742     case 'e': case 'E':
5743     case 'f': case 'F':
5744     case 'g': case 'G':
5745     case 'h': case 'H':
5746     case 'i': case 'I':
5747     case 'j': case 'J':
5748     case 'k': case 'K':
5749     case 'l': case 'L':
5750     case 'm': case 'M':
5751     case 'n': case 'N':
5752     case 'o': case 'O':
5753     case 'p': case 'P':
5754     case 'q': case 'Q':
5755     case 'r': case 'R':
5756     case 's': case 'S':
5757     case 't': case 'T':
5758     case 'u': case 'U':
5759               case 'V':
5760     case 'w': case 'W':
5761               case 'X':
5762     case 'y': case 'Y':
5763     case 'z': case 'Z':
5764
5765       keylookup: {
5766         bool anydelim;
5767         I32 tmp;
5768
5769         orig_keyword = 0;
5770         gv = NULL;
5771         gvp = NULL;
5772
5773         PL_bufptr = s;
5774         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5775
5776         /* Some keywords can be followed by any delimiter, including ':' */
5777         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5778                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5779                              (PL_tokenbuf[0] == 'q' &&
5780                               strchr("qwxr", PL_tokenbuf[1])))));
5781
5782         /* x::* is just a word, unless x is "CORE" */
5783         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5784             goto just_a_word;
5785
5786         d = s;
5787         while (d < PL_bufend && isSPACE(*d))
5788                 d++;    /* no comments skipped here, or s### is misparsed */
5789
5790         /* Is this a word before a => operator? */
5791         if (*d == '=' && d[1] == '>') {
5792             CLINE;
5793             pl_yylval.opval
5794                 = (OP*)newSVOP(OP_CONST, 0,
5795                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5796             pl_yylval.opval->op_private = OPpCONST_BARE;
5797             TERM(WORD);
5798         }
5799
5800         /* Check for plugged-in keyword */
5801         {
5802             OP *o;
5803             int result;
5804             char *saved_bufptr = PL_bufptr;
5805             PL_bufptr = s;
5806             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5807             s = PL_bufptr;
5808             if (result == KEYWORD_PLUGIN_DECLINE) {
5809                 /* not a plugged-in keyword */
5810                 PL_bufptr = saved_bufptr;
5811             } else if (result == KEYWORD_PLUGIN_STMT) {
5812                 pl_yylval.opval = o;
5813                 CLINE;
5814                 PL_expect = XSTATE;
5815                 return REPORT(PLUGSTMT);
5816             } else if (result == KEYWORD_PLUGIN_EXPR) {
5817                 pl_yylval.opval = o;
5818                 CLINE;
5819                 PL_expect = XOPERATOR;
5820                 return REPORT(PLUGEXPR);
5821             } else {
5822                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5823                                         PL_tokenbuf);
5824             }
5825         }
5826
5827         /* Check for built-in keyword */
5828         tmp = keyword(PL_tokenbuf, len, 0);
5829
5830         /* Is this a label? */
5831         if (!anydelim && PL_expect == XSTATE
5832               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5833             if (tmp)
5834                 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5835             s = d + 1;
5836             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5837             CLINE;
5838             TOKEN(LABEL);
5839         }
5840
5841         if (tmp < 0) {                  /* second-class keyword? */
5842             GV *ogv = NULL;     /* override (winner) */
5843             GV *hgv = NULL;     /* hidden (loser) */
5844             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5845                 CV *cv;
5846                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5847                     (cv = GvCVu(gv)))
5848                 {
5849                     if (GvIMPORTED_CV(gv))
5850                         ogv = gv;
5851                     else if (! CvMETHOD(cv))
5852                         hgv = gv;
5853                 }
5854                 if (!ogv &&
5855                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5856                     (gv = *gvp) && isGV_with_GP(gv) &&
5857                     GvCVu(gv) && GvIMPORTED_CV(gv))
5858                 {
5859                     ogv = gv;
5860                 }
5861             }
5862             if (ogv) {
5863                 orig_keyword = tmp;
5864                 tmp = 0;                /* overridden by import or by GLOBAL */
5865             }
5866             else if (gv && !gvp
5867                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5868                      && GvCVu(gv))
5869             {
5870                 tmp = 0;                /* any sub overrides "weak" keyword */
5871             }
5872             else {                      /* no override */
5873                 tmp = -tmp;
5874                 if (tmp == KEY_dump) {
5875                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5876                                    "dump() better written as CORE::dump()");
5877                 }
5878                 gv = NULL;
5879                 gvp = 0;
5880                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
5881                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5882                                    "Ambiguous call resolved as CORE::%s(), %s",
5883                                    GvENAME(hgv), "qualify as such or use &");
5884             }
5885         }
5886
5887       reserved_word:
5888         switch (tmp) {
5889
5890         default:                        /* not a keyword */
5891             /* Trade off - by using this evil construction we can pull the
5892                variable gv into the block labelled keylookup. If not, then
5893                we have to give it function scope so that the goto from the
5894                earlier ':' case doesn't bypass the initialisation.  */
5895             if (0) {
5896             just_a_word_zero_gv:
5897                 gv = NULL;
5898                 gvp = NULL;
5899                 orig_keyword = 0;
5900             }
5901           just_a_word: {
5902                 SV *sv;
5903                 int pkgname = 0;
5904                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5905                 OP *rv2cv_op;
5906                 CV *cv;
5907 #ifdef PERL_MAD
5908                 SV *nextPL_nextwhite = 0;
5909 #endif
5910
5911
5912                 /* Get the rest if it looks like a package qualifier */
5913
5914                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5915                     STRLEN morelen;
5916                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5917                                   TRUE, &morelen);
5918                     if (!morelen)
5919                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5920                                 *s == '\'' ? "'" : "::");
5921                     len += morelen;
5922                     pkgname = 1;
5923                 }
5924
5925                 if (PL_expect == XOPERATOR) {
5926                     if (PL_bufptr == PL_linestart) {
5927                         CopLINE_dec(PL_curcop);
5928                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5929                         CopLINE_inc(PL_curcop);
5930                     }
5931                     else
5932                         no_op("Bareword",s);
5933                 }
5934
5935                 /* Look for a subroutine with this name in current package,
5936                    unless name is "Foo::", in which case Foo is a bearword
5937                    (and a package name). */
5938
5939                 if (len > 2 && !PL_madskills &&
5940                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5941                 {
5942                     if (ckWARN(WARN_BAREWORD)
5943                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5944                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5945                             "Bareword \"%s\" refers to nonexistent package",
5946                              PL_tokenbuf);
5947                     len -= 2;
5948                     PL_tokenbuf[len] = '\0';
5949                     gv = NULL;
5950                     gvp = 0;
5951                 }
5952                 else {
5953                     if (!gv) {
5954                         /* Mustn't actually add anything to a symbol table.
5955                            But also don't want to "initialise" any placeholder
5956                            constants that might already be there into full
5957                            blown PVGVs with attached PVCV.  */
5958                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5959                                                GV_NOADD_NOINIT, SVt_PVCV);
5960                     }
5961                     len = 0;
5962                 }
5963
5964                 /* if we saw a global override before, get the right name */
5965
5966                 if (gvp) {
5967                     sv = newSVpvs("CORE::GLOBAL::");
5968                     sv_catpv(sv,PL_tokenbuf);
5969                 }
5970                 else {
5971                     /* If len is 0, newSVpv does strlen(), which is correct.
5972                        If len is non-zero, then it will be the true length,
5973                        and so the scalar will be created correctly.  */
5974                     sv = newSVpv(PL_tokenbuf,len);
5975                 }
5976 #ifdef PERL_MAD
5977                 if (PL_madskills && !PL_thistoken) {
5978                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5979                     PL_thistoken = newSVpvn(start,s - start);
5980                     PL_realtokenstart = s - SvPVX(PL_linestr);
5981                 }
5982 #endif
5983
5984                 /* Presume this is going to be a bareword of some sort. */
5985
5986                 CLINE;
5987                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5988                 pl_yylval.opval->op_private = OPpCONST_BARE;
5989                 /* UTF-8 package name? */
5990                 if (UTF && !IN_BYTES &&
5991                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5992                     SvUTF8_on(sv);
5993
5994                 /* And if "Foo::", then that's what it certainly is. */
5995
5996                 if (len)
5997                     goto safe_bareword;
5998
5999                 cv = NULL;
6000                 {
6001                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6002                     const_op->op_private = OPpCONST_BARE;
6003                     rv2cv_op = newCVREF(0, const_op);
6004                 }
6005                 if (rv2cv_op->op_type == OP_RV2CV &&
6006                         (rv2cv_op->op_flags & OPf_KIDS)) {
6007                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6008                     switch (rv_op->op_type) {
6009                         case OP_CONST: {
6010                             SV *sv = cSVOPx_sv(rv_op);
6011                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6012                                 cv = (CV*)SvRV(sv);
6013                         } break;
6014                         case OP_GV: {
6015                             GV *gv = cGVOPx_gv(rv_op);
6016                             CV *maybe_cv = GvCVu(gv);
6017                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6018                                 cv = maybe_cv;
6019                         } break;
6020                     }
6021                 }
6022
6023                 /* See if it's the indirect object for a list operator. */
6024
6025                 if (PL_oldoldbufptr &&
6026                     PL_oldoldbufptr < PL_bufptr &&
6027                     (PL_oldoldbufptr == PL_last_lop
6028                      || PL_oldoldbufptr == PL_last_uni) &&
6029                     /* NO SKIPSPACE BEFORE HERE! */
6030                     (PL_expect == XREF ||
6031                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6032                 {
6033                     bool immediate_paren = *s == '(';
6034
6035                     /* (Now we can afford to cross potential line boundary.) */
6036                     s = SKIPSPACE2(s,nextPL_nextwhite);
6037 #ifdef PERL_MAD
6038                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6039 #endif
6040
6041                     /* Two barewords in a row may indicate method call. */
6042
6043                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6044                         (tmp = intuit_method(s, gv, cv))) {
6045                         op_free(rv2cv_op);
6046                         return REPORT(tmp);
6047                     }
6048
6049                     /* If not a declared subroutine, it's an indirect object. */
6050                     /* (But it's an indir obj regardless for sort.) */
6051                     /* Also, if "_" follows a filetest operator, it's a bareword */
6052
6053                     if (
6054                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6055                          (!cv &&
6056                         (PL_last_lop_op != OP_MAPSTART &&
6057                          PL_last_lop_op != OP_GREPSTART))))
6058                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6059                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6060                        )
6061                     {
6062                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6063                         goto bareword;
6064                     }
6065                 }
6066
6067                 PL_expect = XOPERATOR;
6068 #ifdef PERL_MAD
6069                 if (isSPACE(*s))
6070                     s = SKIPSPACE2(s,nextPL_nextwhite);
6071                 PL_nextwhite = nextPL_nextwhite;
6072 #else
6073                 s = skipspace(s);
6074 #endif
6075
6076                 /* Is this a word before a => operator? */
6077                 if (*s == '=' && s[1] == '>' && !pkgname) {
6078                     op_free(rv2cv_op);
6079                     CLINE;
6080                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6081                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6082                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6083                     TERM(WORD);
6084                 }
6085
6086                 /* If followed by a paren, it's certainly a subroutine. */
6087                 if (*s == '(') {
6088                     CLINE;
6089                     if (cv) {
6090                         d = s + 1;
6091                         while (SPACE_OR_TAB(*d))
6092                             d++;
6093                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6094                             s = d + 1;
6095                             goto its_constant;
6096                         }
6097                     }
6098 #ifdef PERL_MAD
6099                     if (PL_madskills) {
6100                         PL_nextwhite = PL_thiswhite;
6101                         PL_thiswhite = 0;
6102                     }
6103                     start_force(PL_curforce);
6104 #endif
6105                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6106                     PL_expect = XOPERATOR;
6107 #ifdef PERL_MAD
6108                     if (PL_madskills) {
6109                         PL_nextwhite = nextPL_nextwhite;
6110                         curmad('X', PL_thistoken);
6111                         PL_thistoken = newSVpvs("");
6112                     }
6113 #endif
6114                     op_free(rv2cv_op);
6115                     force_next(WORD);
6116                     pl_yylval.ival = 0;
6117                     TOKEN('&');
6118                 }
6119
6120                 /* If followed by var or block, call it a method (unless sub) */
6121
6122                 if ((*s == '$' || *s == '{') && !cv) {
6123                     op_free(rv2cv_op);
6124                     PL_last_lop = PL_oldbufptr;
6125                     PL_last_lop_op = OP_METHOD;
6126                     PREBLOCK(METHOD);
6127                 }
6128
6129                 /* If followed by a bareword, see if it looks like indir obj. */
6130
6131                 if (!orig_keyword
6132                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6133                         && (tmp = intuit_method(s, gv, cv))) {
6134                     op_free(rv2cv_op);
6135                     return REPORT(tmp);
6136                 }
6137
6138                 /* Not a method, so call it a subroutine (if defined) */
6139
6140                 if (cv) {
6141                     if (lastchar == '-')
6142                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6143                                          "Ambiguous use of -%s resolved as -&%s()",
6144                                          PL_tokenbuf, PL_tokenbuf);
6145                     /* Check for a constant sub */
6146                     if ((sv = cv_const_sv(cv))) {
6147                   its_constant:
6148                         op_free(rv2cv_op);
6149                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6150                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6151                         pl_yylval.opval->op_private = 0;
6152                         TOKEN(WORD);
6153                     }
6154
6155                     op_free(pl_yylval.opval);
6156                     pl_yylval.opval = rv2cv_op;
6157                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6158                     PL_last_lop = PL_oldbufptr;
6159                     PL_last_lop_op = OP_ENTERSUB;
6160                     /* Is there a prototype? */
6161                     if (
6162 #ifdef PERL_MAD
6163                         cv &&
6164 #endif
6165                         SvPOK(cv))
6166                     {
6167                         STRLEN protolen;
6168                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6169                         if (!protolen)
6170                             TERM(FUNC0SUB);
6171                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6172                             OPERATOR(UNIOPSUB);
6173                         while (*proto == ';')
6174                             proto++;
6175                         if (*proto == '&' && *s == '{') {
6176                             if (PL_curstash)
6177                                 sv_setpvs(PL_subname, "__ANON__");
6178                             else
6179                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6180                             PREBLOCK(LSTOPSUB);
6181                         }
6182                     }
6183 #ifdef PERL_MAD
6184                     {
6185                         if (PL_madskills) {
6186                             PL_nextwhite = PL_thiswhite;
6187                             PL_thiswhite = 0;
6188                         }
6189                         start_force(PL_curforce);
6190                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6191                         PL_expect = XTERM;
6192                         if (PL_madskills) {
6193                             PL_nextwhite = nextPL_nextwhite;
6194                             curmad('X', PL_thistoken);
6195                             PL_thistoken = newSVpvs("");
6196                         }
6197                         force_next(WORD);
6198                         TOKEN(NOAMP);
6199                     }
6200                 }
6201
6202                 /* Guess harder when madskills require "best effort". */
6203                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6204                     int probable_sub = 0;
6205                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6206                         probable_sub = 1;
6207                     else if (isALPHA(*s)) {
6208                         char tmpbuf[1024];
6209                         STRLEN tmplen;
6210                         d = s;
6211                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6212                         if (!keyword(tmpbuf, tmplen, 0))
6213                             probable_sub = 1;
6214                         else {
6215                             while (d < PL_bufend && isSPACE(*d))
6216                                 d++;
6217                             if (*d == '=' && d[1] == '>')
6218                                 probable_sub = 1;
6219                         }
6220                     }
6221                     if (probable_sub) {
6222                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6223                         op_free(pl_yylval.opval);
6224                         pl_yylval.opval = rv2cv_op;
6225                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6226                         PL_last_lop = PL_oldbufptr;
6227                         PL_last_lop_op = OP_ENTERSUB;
6228                         PL_nextwhite = PL_thiswhite;
6229                         PL_thiswhite = 0;
6230                         start_force(PL_curforce);
6231                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6232                         PL_expect = XTERM;
6233                         PL_nextwhite = nextPL_nextwhite;
6234                         curmad('X', PL_thistoken);
6235                         PL_thistoken = newSVpvs("");
6236                         force_next(WORD);
6237                         TOKEN(NOAMP);
6238                     }
6239 #else
6240                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6241                     PL_expect = XTERM;
6242                     force_next(WORD);
6243                     TOKEN(NOAMP);
6244 #endif
6245                 }
6246
6247                 /* Call it a bare word */
6248
6249                 if (PL_hints & HINT_STRICT_SUBS)
6250                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6251                 else {
6252                 bareword:
6253                     /* after "print" and similar functions (corresponding to
6254                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6255                      * a filehandle should be subject to "strict subs".
6256                      * Likewise for the optional indirect-object argument to system
6257                      * or exec, which can't be a bareword */
6258                     if ((PL_last_lop_op == OP_PRINT
6259                             || PL_last_lop_op == OP_PRTF
6260                             || PL_last_lop_op == OP_SAY
6261                             || PL_last_lop_op == OP_SYSTEM
6262                             || PL_last_lop_op == OP_EXEC)
6263                             && (PL_hints & HINT_STRICT_SUBS))
6264                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6265                     if (lastchar != '-') {
6266                         if (ckWARN(WARN_RESERVED)) {
6267                             d = PL_tokenbuf;
6268                             while (isLOWER(*d))
6269                                 d++;
6270                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6271                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6272                                        PL_tokenbuf);
6273                         }
6274                     }
6275                 }
6276                 op_free(rv2cv_op);
6277
6278             safe_bareword:
6279                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6280                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6281                                      "Operator or semicolon missing before %c%s",
6282                                      lastchar, PL_tokenbuf);
6283                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6284                                      "Ambiguous use of %c resolved as operator %c",
6285                                      lastchar, lastchar);
6286                 }
6287                 TOKEN(WORD);
6288             }
6289
6290         case KEY___FILE__:
6291             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6292                                         newSVpv(CopFILE(PL_curcop),0));
6293             TERM(THING);
6294
6295         case KEY___LINE__:
6296             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6297                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6298             TERM(THING);
6299
6300         case KEY___PACKAGE__:
6301             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6302                                         (PL_curstash
6303                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6304                                          : &PL_sv_undef));
6305             TERM(THING);
6306
6307         case KEY___DATA__:
6308         case KEY___END__: {
6309             GV *gv;
6310             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6311                 const char *pname = "main";
6312                 if (PL_tokenbuf[2] == 'D')
6313                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6314                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6315                                 SVt_PVIO);
6316                 GvMULTI_on(gv);
6317                 if (!GvIO(gv))
6318                     GvIOp(gv) = newIO();
6319                 IoIFP(GvIOp(gv)) = PL_rsfp;
6320 #if defined(HAS_FCNTL) && defined(F_SETFD)
6321                 {
6322                     const int fd = PerlIO_fileno(PL_rsfp);
6323                     fcntl(fd,F_SETFD,fd >= 3);
6324                 }
6325 #endif
6326                 /* Mark this internal pseudo-handle as clean */
6327                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6328                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6329                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6330                 else
6331                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6332 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6333                 /* if the script was opened in binmode, we need to revert
6334                  * it to text mode for compatibility; but only iff it has CRs
6335                  * XXX this is a questionable hack at best. */
6336                 if (PL_bufend-PL_bufptr > 2
6337                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6338                 {
6339                     Off_t loc = 0;
6340                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6341                         loc = PerlIO_tell(PL_rsfp);
6342                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6343                     }
6344 #ifdef NETWARE
6345                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6346 #else
6347                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6348 #endif  /* NETWARE */
6349 #ifdef PERLIO_IS_STDIO /* really? */
6350 #  if defined(__BORLANDC__)
6351                         /* XXX see note in do_binmode() */
6352                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6353 #  endif
6354 #endif
6355                         if (loc > 0)
6356                             PerlIO_seek(PL_rsfp, loc, 0);
6357                     }
6358                 }
6359 #endif
6360 #ifdef PERLIO_LAYERS
6361                 if (!IN_BYTES) {
6362                     if (UTF)
6363                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6364                     else if (PL_encoding) {
6365                         SV *name;
6366                         dSP;
6367                         ENTER;
6368                         SAVETMPS;
6369                         PUSHMARK(sp);
6370                         EXTEND(SP, 1);
6371                         XPUSHs(PL_encoding);
6372                         PUTBACK;
6373                         call_method("name", G_SCALAR);
6374                         SPAGAIN;
6375                         name = POPs;
6376                         PUTBACK;
6377                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6378                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6379                                                       SVfARG(name)));
6380                         FREETMPS;
6381                         LEAVE;
6382                     }
6383                 }
6384 #endif
6385 #ifdef PERL_MAD
6386                 if (PL_madskills) {
6387                     if (PL_realtokenstart >= 0) {
6388                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6389                         if (!PL_endwhite)
6390                             PL_endwhite = newSVpvs("");
6391                         sv_catsv(PL_endwhite, PL_thiswhite);
6392                         PL_thiswhite = 0;
6393                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6394                         PL_realtokenstart = -1;
6395                     }
6396                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6397                            != NULL) ;
6398                 }
6399 #endif
6400                 PL_rsfp = NULL;
6401             }
6402             goto fake_eof;
6403         }
6404
6405         case KEY_AUTOLOAD:
6406         case KEY_DESTROY:
6407         case KEY_BEGIN:
6408         case KEY_UNITCHECK:
6409         case KEY_CHECK:
6410         case KEY_INIT:
6411         case KEY_END:
6412             if (PL_expect == XSTATE) {
6413                 s = PL_bufptr;
6414                 goto really_sub;
6415             }
6416             goto just_a_word;
6417
6418         case KEY_CORE:
6419             if (*s == ':' && s[1] == ':') {
6420                 s += 2;
6421                 d = s;
6422                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6423                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6424                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6425                 if (tmp < 0)
6426                     tmp = -tmp;
6427                 else if (tmp == KEY_require || tmp == KEY_do)
6428                     /* that's a way to remember we saw "CORE::" */
6429                     orig_keyword = tmp;
6430                 goto reserved_word;
6431             }
6432             goto just_a_word;
6433
6434         case KEY_abs:
6435             UNI(OP_ABS);
6436
6437         case KEY_alarm:
6438             UNI(OP_ALARM);
6439
6440         case KEY_accept:
6441             LOP(OP_ACCEPT,XTERM);
6442
6443         case KEY_and:
6444             OPERATOR(ANDOP);
6445
6446         case KEY_atan2:
6447             LOP(OP_ATAN2,XTERM);
6448
6449         case KEY_bind:
6450             LOP(OP_BIND,XTERM);
6451
6452         case KEY_binmode:
6453             LOP(OP_BINMODE,XTERM);
6454
6455         case KEY_bless:
6456             LOP(OP_BLESS,XTERM);
6457
6458         case KEY_break:
6459             FUN0(OP_BREAK);
6460
6461         case KEY_chop:
6462             UNI(OP_CHOP);
6463
6464         case KEY_continue:
6465             /* When 'use switch' is in effect, continue has a dual
6466                life as a control operator. */
6467             {
6468                 if (!FEATURE_IS_ENABLED("switch"))
6469                     PREBLOCK(CONTINUE);
6470                 else {
6471                     /* We have to disambiguate the two senses of
6472                       "continue". If the next token is a '{' then
6473                       treat it as the start of a continue block;
6474                       otherwise treat it as a control operator.
6475                      */
6476                     s = skipspace(s);
6477                     if (*s == '{')
6478             PREBLOCK(CONTINUE);
6479                     else
6480                         FUN0(OP_CONTINUE);
6481                 }
6482             }
6483
6484         case KEY_chdir:
6485             /* may use HOME */
6486             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6487             UNI(OP_CHDIR);
6488
6489         case KEY_close:
6490             UNI(OP_CLOSE);
6491
6492         case KEY_closedir:
6493             UNI(OP_CLOSEDIR);
6494
6495         case KEY_cmp:
6496             Eop(OP_SCMP);
6497
6498         case KEY_caller:
6499             UNI(OP_CALLER);
6500
6501         case KEY_crypt:
6502 #ifdef FCRYPT
6503             if (!PL_cryptseen) {
6504                 PL_cryptseen = TRUE;
6505                 init_des();
6506             }
6507 #endif
6508             LOP(OP_CRYPT,XTERM);
6509
6510         case KEY_chmod:
6511             LOP(OP_CHMOD,XTERM);
6512
6513         case KEY_chown:
6514             LOP(OP_CHOWN,XTERM);
6515
6516         case KEY_connect:
6517             LOP(OP_CONNECT,XTERM);
6518
6519         case KEY_chr:
6520             UNI(OP_CHR);
6521
6522         case KEY_cos:
6523             UNI(OP_COS);
6524
6525         case KEY_chroot:
6526             UNI(OP_CHROOT);
6527
6528         case KEY_default:
6529             PREBLOCK(DEFAULT);
6530
6531         case KEY_do:
6532             s = SKIPSPACE1(s);
6533             if (*s == '{')
6534                 PRETERMBLOCK(DO);
6535             if (*s != '\'')
6536                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6537             if (orig_keyword == KEY_do) {
6538                 orig_keyword = 0;
6539                 pl_yylval.ival = 1;
6540             }
6541             else
6542                 pl_yylval.ival = 0;
6543             OPERATOR(DO);
6544
6545         case KEY_die:
6546             PL_hints |= HINT_BLOCK_SCOPE;
6547             LOP(OP_DIE,XTERM);
6548
6549         case KEY_defined:
6550             UNI(OP_DEFINED);
6551
6552         case KEY_delete:
6553             UNI(OP_DELETE);
6554
6555         case KEY_dbmopen:
6556             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6557             LOP(OP_DBMOPEN,XTERM);
6558
6559         case KEY_dbmclose:
6560             UNI(OP_DBMCLOSE);
6561
6562         case KEY_dump:
6563             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6564             LOOPX(OP_DUMP);
6565
6566         case KEY_else:
6567             PREBLOCK(ELSE);
6568
6569         case KEY_elsif:
6570             pl_yylval.ival = CopLINE(PL_curcop);
6571             OPERATOR(ELSIF);
6572
6573         case KEY_eq:
6574             Eop(OP_SEQ);
6575
6576         case KEY_exists:
6577             UNI(OP_EXISTS);
6578
6579         case KEY_exit:
6580             if (PL_madskills)
6581                 UNI(OP_INT);
6582             UNI(OP_EXIT);
6583
6584         case KEY_eval:
6585             s = SKIPSPACE1(s);
6586             if (*s == '{') { /* block eval */
6587                 PL_expect = XTERMBLOCK;
6588                 UNIBRACK(OP_ENTERTRY);
6589             }
6590             else { /* string eval */
6591                 PL_expect = XTERM;
6592                 UNIBRACK(OP_ENTEREVAL);
6593             }
6594
6595         case KEY_eof:
6596             UNI(OP_EOF);
6597
6598         case KEY_exp:
6599             UNI(OP_EXP);
6600
6601         case KEY_each:
6602             UNI(OP_EACH);
6603
6604         case KEY_exec:
6605             LOP(OP_EXEC,XREF);
6606
6607         case KEY_endhostent:
6608             FUN0(OP_EHOSTENT);
6609
6610         case KEY_endnetent:
6611             FUN0(OP_ENETENT);
6612
6613         case KEY_endservent:
6614             FUN0(OP_ESERVENT);
6615
6616         case KEY_endprotoent:
6617             FUN0(OP_EPROTOENT);
6618
6619         case KEY_endpwent:
6620             FUN0(OP_EPWENT);
6621
6622         case KEY_endgrent:
6623             FUN0(OP_EGRENT);
6624
6625         case KEY_for:
6626         case KEY_foreach:
6627             pl_yylval.ival = CopLINE(PL_curcop);
6628             s = SKIPSPACE1(s);
6629             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6630                 char *p = s;
6631 #ifdef PERL_MAD
6632                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6633 #endif
6634
6635                 if ((PL_bufend - p) >= 3 &&
6636                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6637                     p += 2;
6638                 else if ((PL_bufend - p) >= 4 &&
6639                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6640                     p += 3;
6641                 p = PEEKSPACE(p);
6642                 if (isIDFIRST_lazy_if(p,UTF)) {
6643                     p = scan_ident(p, PL_bufend,
6644                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6645                     p = PEEKSPACE(p);
6646                 }
6647                 if (*p != '$')
6648                     Perl_croak(aTHX_ "Missing $ on loop variable");
6649 #ifdef PERL_MAD
6650                 s = SvPVX(PL_linestr) + soff;
6651 #endif
6652             }
6653             OPERATOR(FOR);
6654
6655         case KEY_formline:
6656             LOP(OP_FORMLINE,XTERM);
6657
6658         case KEY_fork:
6659             FUN0(OP_FORK);
6660
6661         case KEY_fcntl:
6662             LOP(OP_FCNTL,XTERM);
6663
6664         case KEY_fileno:
6665             UNI(OP_FILENO);
6666
6667         case KEY_flock:
6668             LOP(OP_FLOCK,XTERM);
6669
6670         case KEY_gt:
6671             Rop(OP_SGT);
6672
6673         case KEY_ge:
6674             Rop(OP_SGE);
6675
6676         case KEY_grep:
6677             LOP(OP_GREPSTART, XREF);
6678
6679         case KEY_goto:
6680             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6681             LOOPX(OP_GOTO);
6682
6683         case KEY_gmtime:
6684             UNI(OP_GMTIME);
6685
6686         case KEY_getc:
6687             UNIDOR(OP_GETC);
6688
6689         case KEY_getppid:
6690             FUN0(OP_GETPPID);
6691
6692         case KEY_getpgrp:
6693             UNI(OP_GETPGRP);
6694
6695         case KEY_getpriority:
6696             LOP(OP_GETPRIORITY,XTERM);
6697
6698         case KEY_getprotobyname:
6699             UNI(OP_GPBYNAME);
6700
6701         case KEY_getprotobynumber:
6702             LOP(OP_GPBYNUMBER,XTERM);
6703
6704         case KEY_getprotoent:
6705             FUN0(OP_GPROTOENT);
6706
6707         case KEY_getpwent:
6708             FUN0(OP_GPWENT);
6709
6710         case KEY_getpwnam:
6711             UNI(OP_GPWNAM);
6712
6713         case KEY_getpwuid:
6714             UNI(OP_GPWUID);
6715
6716         case KEY_getpeername:
6717             UNI(OP_GETPEERNAME);
6718
6719         case KEY_gethostbyname:
6720             UNI(OP_GHBYNAME);
6721
6722         case KEY_gethostbyaddr:
6723             LOP(OP_GHBYADDR,XTERM);
6724
6725         case KEY_gethostent:
6726             FUN0(OP_GHOSTENT);
6727
6728         case KEY_getnetbyname:
6729             UNI(OP_GNBYNAME);
6730
6731         case KEY_getnetbyaddr:
6732             LOP(OP_GNBYADDR,XTERM);
6733
6734         case KEY_getnetent:
6735             FUN0(OP_GNETENT);
6736
6737         case KEY_getservbyname:
6738             LOP(OP_GSBYNAME,XTERM);
6739
6740         case KEY_getservbyport:
6741             LOP(OP_GSBYPORT,XTERM);
6742
6743         case KEY_getservent:
6744             FUN0(OP_GSERVENT);
6745
6746         case KEY_getsockname:
6747             UNI(OP_GETSOCKNAME);
6748
6749         case KEY_getsockopt:
6750             LOP(OP_GSOCKOPT,XTERM);
6751
6752         case KEY_getgrent:
6753             FUN0(OP_GGRENT);
6754
6755         case KEY_getgrnam:
6756             UNI(OP_GGRNAM);
6757
6758         case KEY_getgrgid:
6759             UNI(OP_GGRGID);
6760
6761         case KEY_getlogin:
6762             FUN0(OP_GETLOGIN);
6763
6764         case KEY_given:
6765             pl_yylval.ival = CopLINE(PL_curcop);
6766             OPERATOR(GIVEN);
6767
6768         case KEY_glob:
6769             LOP(OP_GLOB,XTERM);
6770
6771         case KEY_hex:
6772             UNI(OP_HEX);
6773
6774         case KEY_if:
6775             pl_yylval.ival = CopLINE(PL_curcop);
6776             OPERATOR(IF);
6777
6778         case KEY_index:
6779             LOP(OP_INDEX,XTERM);
6780
6781         case KEY_int:
6782             UNI(OP_INT);
6783
6784         case KEY_ioctl:
6785             LOP(OP_IOCTL,XTERM);
6786
6787         case KEY_join:
6788             LOP(OP_JOIN,XTERM);
6789
6790         case KEY_keys:
6791             UNI(OP_KEYS);
6792
6793         case KEY_kill:
6794             LOP(OP_KILL,XTERM);
6795
6796         case KEY_last:
6797             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6798             LOOPX(OP_LAST);
6799
6800         case KEY_lc:
6801             UNI(OP_LC);
6802
6803         case KEY_lcfirst:
6804             UNI(OP_LCFIRST);
6805
6806         case KEY_local:
6807             pl_yylval.ival = 0;
6808             OPERATOR(LOCAL);
6809
6810         case KEY_length:
6811             UNI(OP_LENGTH);
6812
6813         case KEY_lt:
6814             Rop(OP_SLT);
6815
6816         case KEY_le:
6817             Rop(OP_SLE);
6818
6819         case KEY_localtime:
6820             UNI(OP_LOCALTIME);
6821
6822         case KEY_log:
6823             UNI(OP_LOG);
6824
6825         case KEY_link:
6826             LOP(OP_LINK,XTERM);
6827
6828         case KEY_listen:
6829             LOP(OP_LISTEN,XTERM);
6830
6831         case KEY_lock:
6832             UNI(OP_LOCK);
6833
6834         case KEY_lstat:
6835             UNI(OP_LSTAT);
6836
6837         case KEY_m:
6838             s = scan_pat(s,OP_MATCH);
6839             TERM(sublex_start());
6840
6841         case KEY_map:
6842             LOP(OP_MAPSTART, XREF);
6843
6844         case KEY_mkdir:
6845             LOP(OP_MKDIR,XTERM);
6846
6847         case KEY_msgctl:
6848             LOP(OP_MSGCTL,XTERM);
6849
6850         case KEY_msgget:
6851             LOP(OP_MSGGET,XTERM);
6852
6853         case KEY_msgrcv:
6854             LOP(OP_MSGRCV,XTERM);
6855
6856         case KEY_msgsnd:
6857             LOP(OP_MSGSND,XTERM);
6858
6859         case KEY_our:
6860         case KEY_my:
6861         case KEY_state:
6862             PL_in_my = (U16)tmp;
6863             s = SKIPSPACE1(s);
6864             if (isIDFIRST_lazy_if(s,UTF)) {
6865 #ifdef PERL_MAD
6866                 char* start = s;
6867 #endif
6868                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6869                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6870                     goto really_sub;
6871                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6872                 if (!PL_in_my_stash) {
6873                     char tmpbuf[1024];
6874                     PL_bufptr = s;
6875                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6876                     yyerror(tmpbuf);
6877                 }
6878 #ifdef PERL_MAD
6879                 if (PL_madskills) {     /* just add type to declarator token */
6880                     sv_catsv(PL_thistoken, PL_nextwhite);
6881                     PL_nextwhite = 0;
6882                     sv_catpvn(PL_thistoken, start, s - start);
6883                 }
6884 #endif
6885             }
6886             pl_yylval.ival = 1;
6887             OPERATOR(MY);
6888
6889         case KEY_next:
6890             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6891             LOOPX(OP_NEXT);
6892
6893         case KEY_ne:
6894             Eop(OP_SNE);
6895
6896         case KEY_no:
6897             s = tokenize_use(0, s);
6898             OPERATOR(USE);
6899
6900         case KEY_not:
6901             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6902                 FUN1(OP_NOT);
6903             else
6904                 OPERATOR(NOTOP);
6905
6906         case KEY_open:
6907             s = SKIPSPACE1(s);
6908             if (isIDFIRST_lazy_if(s,UTF)) {
6909                 const char *t;
6910                 for (d = s; isALNUM_lazy_if(d,UTF);)
6911                     d++;
6912                 for (t=d; isSPACE(*t);)
6913                     t++;
6914                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6915                     /* [perl #16184] */
6916                     && !(t[0] == '=' && t[1] == '>')
6917                 ) {
6918                     int parms_len = (int)(d-s);
6919                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6920                            "Precedence problem: open %.*s should be open(%.*s)",
6921                             parms_len, s, parms_len, s);
6922                 }
6923             }
6924             LOP(OP_OPEN,XTERM);
6925
6926         case KEY_or:
6927             pl_yylval.ival = OP_OR;
6928             OPERATOR(OROP);
6929
6930         case KEY_ord:
6931             UNI(OP_ORD);
6932
6933         case KEY_oct:
6934             UNI(OP_OCT);
6935
6936         case KEY_opendir:
6937             LOP(OP_OPEN_DIR,XTERM);
6938
6939         case KEY_print:
6940             checkcomma(s,PL_tokenbuf,"filehandle");
6941             LOP(OP_PRINT,XREF);
6942
6943         case KEY_printf:
6944             checkcomma(s,PL_tokenbuf,"filehandle");
6945             LOP(OP_PRTF,XREF);
6946
6947         case KEY_prototype:
6948             UNI(OP_PROTOTYPE);
6949
6950         case KEY_push:
6951             LOP(OP_PUSH,XTERM);
6952
6953         case KEY_pop:
6954             UNIDOR(OP_POP);
6955
6956         case KEY_pos:
6957             UNIDOR(OP_POS);
6958
6959         case KEY_pack:
6960             LOP(OP_PACK,XTERM);
6961
6962         case KEY_package:
6963             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6964             s = force_version(s, FALSE);
6965             OPERATOR(PACKAGE);
6966
6967         case KEY_pipe:
6968             LOP(OP_PIPE_OP,XTERM);
6969
6970         case KEY_q:
6971             s = scan_str(s,!!PL_madskills,FALSE);
6972             if (!s)
6973                 missingterm(NULL);
6974             pl_yylval.ival = OP_CONST;
6975             TERM(sublex_start());
6976
6977         case KEY_quotemeta:
6978             UNI(OP_QUOTEMETA);
6979
6980         case KEY_qw:
6981             s = scan_str(s,!!PL_madskills,FALSE);
6982             if (!s)
6983                 missingterm(NULL);
6984             PL_expect = XOPERATOR;
6985             force_next(')');
6986             if (SvCUR(PL_lex_stuff)) {
6987                 OP *words = NULL;
6988                 int warned = 0;
6989                 d = SvPV_force(PL_lex_stuff, len);
6990                 while (len) {
6991                     for (; isSPACE(*d) && len; --len, ++d)
6992                         /**/;
6993                     if (len) {
6994                         SV *sv;
6995                         const char *b = d;
6996                         if (!warned && ckWARN(WARN_QW)) {
6997                             for (; !isSPACE(*d) && len; --len, ++d) {
6998                                 if (*d == ',') {
6999                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7000                                         "Possible attempt to separate words with commas");
7001                                     ++warned;
7002                                 }
7003                                 else if (*d == '#') {
7004                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7005                                         "Possible attempt to put comments in qw() list");
7006                                     ++warned;
7007                                 }
7008                             }
7009                         }
7010                         else {
7011                             for (; !isSPACE(*d) && len; --len, ++d)
7012                                 /**/;
7013                         }
7014                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7015                         words = append_elem(OP_LIST, words,
7016                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7017                     }
7018                 }
7019                 if (words) {
7020                     start_force(PL_curforce);
7021                     NEXTVAL_NEXTTOKE.opval = words;
7022                     force_next(THING);
7023                 }
7024             }
7025             if (PL_lex_stuff) {
7026                 SvREFCNT_dec(PL_lex_stuff);
7027                 PL_lex_stuff = NULL;
7028             }
7029             PL_expect = XTERM;
7030             TOKEN('(');
7031
7032         case KEY_qq:
7033             s = scan_str(s,!!PL_madskills,FALSE);
7034             if (!s)
7035                 missingterm(NULL);
7036             pl_yylval.ival = OP_STRINGIFY;
7037             if (SvIVX(PL_lex_stuff) == '\'')
7038                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7039             TERM(sublex_start());
7040
7041         case KEY_qr:
7042             s = scan_pat(s,OP_QR);
7043             TERM(sublex_start());
7044
7045         case KEY_qx:
7046             s = scan_str(s,!!PL_madskills,FALSE);
7047             if (!s)
7048                 missingterm(NULL);
7049             readpipe_override();
7050             TERM(sublex_start());
7051
7052         case KEY_return:
7053             OLDLOP(OP_RETURN);
7054
7055         case KEY_require:
7056             s = SKIPSPACE1(s);
7057             if (isDIGIT(*s)) {
7058                 s = force_version(s, FALSE);
7059             }
7060             else if (*s != 'v' || !isDIGIT(s[1])
7061                     || (s = force_version(s, TRUE), *s == 'v'))
7062             {
7063                 *PL_tokenbuf = '\0';
7064                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7065                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7066                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7067                 else if (*s == '<')
7068                     yyerror("<> should be quotes");
7069             }
7070             if (orig_keyword == KEY_require) {
7071                 orig_keyword = 0;
7072                 pl_yylval.ival = 1;
7073             }
7074             else
7075                 pl_yylval.ival = 0;
7076             PL_expect = XTERM;
7077             PL_bufptr = s;
7078             PL_last_uni = PL_oldbufptr;
7079             PL_last_lop_op = OP_REQUIRE;
7080             s = skipspace(s);
7081             return REPORT( (int)REQUIRE );
7082
7083         case KEY_reset:
7084             UNI(OP_RESET);
7085
7086         case KEY_redo:
7087             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7088             LOOPX(OP_REDO);
7089
7090         case KEY_rename:
7091             LOP(OP_RENAME,XTERM);
7092
7093         case KEY_rand:
7094             UNI(OP_RAND);
7095
7096         case KEY_rmdir:
7097             UNI(OP_RMDIR);
7098
7099         case KEY_rindex:
7100             LOP(OP_RINDEX,XTERM);
7101
7102         case KEY_read:
7103             LOP(OP_READ,XTERM);
7104
7105         case KEY_readdir:
7106             UNI(OP_READDIR);
7107
7108         case KEY_readline:
7109             UNIDOR(OP_READLINE);
7110
7111         case KEY_readpipe:
7112             UNIDOR(OP_BACKTICK);
7113
7114         case KEY_rewinddir:
7115             UNI(OP_REWINDDIR);
7116
7117         case KEY_recv:
7118             LOP(OP_RECV,XTERM);
7119
7120         case KEY_reverse:
7121             LOP(OP_REVERSE,XTERM);
7122
7123         case KEY_readlink:
7124             UNIDOR(OP_READLINK);
7125
7126         case KEY_ref:
7127             UNI(OP_REF);
7128
7129         case KEY_s:
7130             s = scan_subst(s);
7131             if (pl_yylval.opval)
7132                 TERM(sublex_start());
7133             else
7134                 TOKEN(1);       /* force error */
7135
7136         case KEY_say:
7137             checkcomma(s,PL_tokenbuf,"filehandle");
7138             LOP(OP_SAY,XREF);
7139
7140         case KEY_chomp:
7141             UNI(OP_CHOMP);
7142
7143         case KEY_scalar:
7144             UNI(OP_SCALAR);
7145
7146         case KEY_select:
7147             LOP(OP_SELECT,XTERM);
7148
7149         case KEY_seek:
7150             LOP(OP_SEEK,XTERM);
7151
7152         case KEY_semctl:
7153             LOP(OP_SEMCTL,XTERM);
7154
7155         case KEY_semget:
7156             LOP(OP_SEMGET,XTERM);
7157
7158         case KEY_semop:
7159             LOP(OP_SEMOP,XTERM);
7160
7161         case KEY_send:
7162             LOP(OP_SEND,XTERM);
7163
7164         case KEY_setpgrp:
7165             LOP(OP_SETPGRP,XTERM);
7166
7167         case KEY_setpriority:
7168             LOP(OP_SETPRIORITY,XTERM);
7169
7170         case KEY_sethostent:
7171             UNI(OP_SHOSTENT);
7172
7173         case KEY_setnetent:
7174             UNI(OP_SNETENT);
7175
7176         case KEY_setservent:
7177             UNI(OP_SSERVENT);
7178
7179         case KEY_setprotoent:
7180             UNI(OP_SPROTOENT);
7181
7182         case KEY_setpwent:
7183             FUN0(OP_SPWENT);
7184
7185         case KEY_setgrent:
7186             FUN0(OP_SGRENT);
7187
7188         case KEY_seekdir:
7189             LOP(OP_SEEKDIR,XTERM);
7190
7191         case KEY_setsockopt:
7192             LOP(OP_SSOCKOPT,XTERM);
7193
7194         case KEY_shift:
7195             UNIDOR(OP_SHIFT);
7196
7197         case KEY_shmctl:
7198             LOP(OP_SHMCTL,XTERM);
7199
7200         case KEY_shmget:
7201             LOP(OP_SHMGET,XTERM);
7202
7203         case KEY_shmread:
7204             LOP(OP_SHMREAD,XTERM);
7205
7206         case KEY_shmwrite:
7207             LOP(OP_SHMWRITE,XTERM);
7208
7209         case KEY_shutdown:
7210             LOP(OP_SHUTDOWN,XTERM);
7211
7212         case KEY_sin:
7213             UNI(OP_SIN);
7214
7215         case KEY_sleep:
7216             UNI(OP_SLEEP);
7217
7218         case KEY_socket:
7219             LOP(OP_SOCKET,XTERM);
7220
7221         case KEY_socketpair:
7222             LOP(OP_SOCKPAIR,XTERM);
7223
7224         case KEY_sort:
7225             checkcomma(s,PL_tokenbuf,"subroutine name");
7226             s = SKIPSPACE1(s);
7227             if (*s == ';' || *s == ')')         /* probably a close */
7228                 Perl_croak(aTHX_ "sort is now a reserved word");
7229             PL_expect = XTERM;
7230             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7231             LOP(OP_SORT,XREF);
7232
7233         case KEY_split:
7234             LOP(OP_SPLIT,XTERM);
7235
7236         case KEY_sprintf:
7237             LOP(OP_SPRINTF,XTERM);
7238
7239         case KEY_splice:
7240             LOP(OP_SPLICE,XTERM);
7241
7242         case KEY_sqrt:
7243             UNI(OP_SQRT);
7244
7245         case KEY_srand:
7246             UNI(OP_SRAND);
7247
7248         case KEY_stat:
7249             UNI(OP_STAT);
7250
7251         case KEY_study:
7252             UNI(OP_STUDY);
7253
7254         case KEY_substr:
7255             LOP(OP_SUBSTR,XTERM);
7256
7257         case KEY_format:
7258         case KEY_sub:
7259           really_sub:
7260             {
7261                 char tmpbuf[sizeof PL_tokenbuf];
7262                 SSize_t tboffset = 0;
7263                 expectation attrful;
7264                 bool have_name, have_proto;
7265                 const int key = tmp;
7266
7267 #ifdef PERL_MAD
7268                 SV *tmpwhite = 0;
7269
7270                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7271                 SV *subtoken = newSVpvn(tstart, s - tstart);
7272                 PL_thistoken = 0;
7273
7274                 d = s;
7275                 s = SKIPSPACE2(s,tmpwhite);
7276 #else
7277                 s = skipspace(s);
7278 #endif
7279
7280                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7281                     (*s == ':' && s[1] == ':'))
7282                 {
7283 #ifdef PERL_MAD
7284                     SV *nametoke = NULL;
7285 #endif
7286
7287                     PL_expect = XBLOCK;
7288                     attrful = XATTRBLOCK;
7289                     /* remember buffer pos'n for later force_word */
7290                     tboffset = s - PL_oldbufptr;
7291                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7292 #ifdef PERL_MAD
7293                     if (PL_madskills)
7294                         nametoke = newSVpvn(s, d - s);
7295 #endif
7296                     if (memchr(tmpbuf, ':', len))
7297                         sv_setpvn(PL_subname, tmpbuf, len);
7298                     else {
7299                         sv_setsv(PL_subname,PL_curstname);
7300                         sv_catpvs(PL_subname,"::");
7301                         sv_catpvn(PL_subname,tmpbuf,len);
7302                     }
7303                     have_name = TRUE;
7304
7305 #ifdef PERL_MAD
7306
7307                     start_force(0);
7308                     CURMAD('X', nametoke);
7309                     CURMAD('_', tmpwhite);
7310                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7311                                       FALSE, TRUE, TRUE);
7312
7313                     s = SKIPSPACE2(d,tmpwhite);
7314 #else
7315                     s = skipspace(d);
7316 #endif
7317                 }
7318                 else {
7319                     if (key == KEY_my)
7320                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7321                     PL_expect = XTERMBLOCK;
7322                     attrful = XATTRTERM;
7323                     sv_setpvs(PL_subname,"?");
7324                     have_name = FALSE;
7325                 }
7326
7327                 if (key == KEY_format) {
7328                     if (*s == '=')
7329                         PL_lex_formbrack = PL_lex_brackets + 1;
7330 #ifdef PERL_MAD
7331                     PL_thistoken = subtoken;
7332                     s = d;
7333 #else
7334                     if (have_name)
7335                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7336                                           FALSE, TRUE, TRUE);
7337 #endif
7338                     OPERATOR(FORMAT);
7339                 }
7340
7341                 /* Look for a prototype */
7342                 if (*s == '(') {
7343                     char *p;
7344                     bool bad_proto = FALSE;
7345                     bool in_brackets = FALSE;
7346                     char greedy_proto = ' ';
7347                     bool proto_after_greedy_proto = FALSE;
7348                     bool must_be_last = FALSE;
7349                     bool underscore = FALSE;
7350                     bool seen_underscore = FALSE;
7351                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7352
7353                     s = scan_str(s,!!PL_madskills,FALSE);
7354                     if (!s)
7355                         Perl_croak(aTHX_ "Prototype not terminated");
7356                     /* strip spaces and check for bad characters */
7357                     d = SvPVX(PL_lex_stuff);
7358                     tmp = 0;
7359                     for (p = d; *p; ++p) {
7360                         if (!isSPACE(*p)) {
7361                             d[tmp++] = *p;
7362
7363                             if (warnillegalproto) {
7364                                 if (must_be_last)
7365                                     proto_after_greedy_proto = TRUE;
7366                                 if (!strchr("$@%*;[]&\\_", *p)) {
7367                                     bad_proto = TRUE;
7368                                 }
7369                                 else {
7370                                     if ( underscore ) {
7371                                         if ( *p != ';' )
7372                                             bad_proto = TRUE;
7373                                         underscore = FALSE;
7374                                     }
7375                                     if ( *p == '[' ) {
7376                                         in_brackets = TRUE;
7377                                     }
7378                                     else if ( *p == ']' ) {
7379                                         in_brackets = FALSE;
7380                                     }
7381                                     else if ( (*p == '@' || *p == '%') &&
7382                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7383                                          !in_brackets ) {
7384                                         must_be_last = TRUE;
7385                                         greedy_proto = *p;
7386                                     }
7387                                     else if ( *p == '_' ) {
7388                                         underscore = seen_underscore = TRUE;
7389                                     }
7390                                 }
7391                             }
7392                         }
7393                     }
7394                     d[tmp] = '\0';
7395                     if (proto_after_greedy_proto)
7396                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7397                                     "Prototype after '%c' for %"SVf" : %s",
7398                                     greedy_proto, SVfARG(PL_subname), d);
7399                     if (bad_proto)
7400                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7401                                     "Illegal character %sin prototype for %"SVf" : %s",
7402                                     seen_underscore ? "after '_' " : "",
7403                                     SVfARG(PL_subname), d);
7404                     SvCUR_set(PL_lex_stuff, tmp);
7405                     have_proto = TRUE;
7406
7407 #ifdef PERL_MAD
7408                     start_force(0);
7409                     CURMAD('q', PL_thisopen);
7410                     CURMAD('_', tmpwhite);
7411                     CURMAD('=', PL_thisstuff);
7412                     CURMAD('Q', PL_thisclose);
7413                     NEXTVAL_NEXTTOKE.opval =
7414                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7415                     PL_lex_stuff = NULL;
7416                     force_next(THING);
7417
7418                     s = SKIPSPACE2(s,tmpwhite);
7419 #else
7420                     s = skipspace(s);
7421 #endif
7422                 }
7423                 else
7424                     have_proto = FALSE;
7425
7426                 if (*s == ':' && s[1] != ':')
7427                     PL_expect = attrful;
7428                 else if (*s != '{' && key == KEY_sub) {
7429                     if (!have_name)
7430                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7431                     else if (*s != ';' && *s != '}')
7432                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7433                 }
7434
7435 #ifdef PERL_MAD
7436                 start_force(0);
7437                 if (tmpwhite) {
7438                     if (PL_madskills)
7439                         curmad('^', newSVpvs(""));
7440                     CURMAD('_', tmpwhite);
7441                 }
7442                 force_next(0);
7443
7444                 PL_thistoken = subtoken;
7445 #else
7446                 if (have_proto) {
7447                     NEXTVAL_NEXTTOKE.opval =
7448                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7449                     PL_lex_stuff = NULL;
7450                     force_next(THING);
7451                 }
7452 #endif
7453                 if (!have_name) {
7454                     if (PL_curstash)
7455                         sv_setpvs(PL_subname, "__ANON__");
7456                     else
7457                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7458                     TOKEN(ANONSUB);
7459                 }
7460 #ifndef PERL_MAD
7461                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7462                                   FALSE, TRUE, TRUE);
7463 #endif
7464                 if (key == KEY_my)
7465                     TOKEN(MYSUB);
7466                 TOKEN(SUB);
7467             }
7468
7469         case KEY_system:
7470             LOP(OP_SYSTEM,XREF);
7471
7472         case KEY_symlink:
7473             LOP(OP_SYMLINK,XTERM);
7474
7475         case KEY_syscall:
7476             LOP(OP_SYSCALL,XTERM);
7477
7478         case KEY_sysopen:
7479             LOP(OP_SYSOPEN,XTERM);
7480
7481         case KEY_sysseek:
7482             LOP(OP_SYSSEEK,XTERM);
7483
7484         case KEY_sysread:
7485             LOP(OP_SYSREAD,XTERM);
7486
7487         case KEY_syswrite:
7488             LOP(OP_SYSWRITE,XTERM);
7489
7490         case KEY_tr:
7491             s = scan_trans(s);
7492             TERM(sublex_start());
7493
7494         case KEY_tell:
7495             UNI(OP_TELL);
7496
7497         case KEY_telldir:
7498             UNI(OP_TELLDIR);
7499
7500         case KEY_tie:
7501             LOP(OP_TIE,XTERM);
7502
7503         case KEY_tied:
7504             UNI(OP_TIED);
7505
7506         case KEY_time:
7507             FUN0(OP_TIME);
7508
7509         case KEY_times:
7510             FUN0(OP_TMS);
7511
7512         case KEY_truncate:
7513             LOP(OP_TRUNCATE,XTERM);
7514
7515         case KEY_uc:
7516             UNI(OP_UC);
7517
7518         case KEY_ucfirst:
7519             UNI(OP_UCFIRST);
7520
7521         case KEY_untie:
7522             UNI(OP_UNTIE);
7523
7524         case KEY_until:
7525             pl_yylval.ival = CopLINE(PL_curcop);
7526             OPERATOR(UNTIL);
7527
7528         case KEY_unless:
7529             pl_yylval.ival = CopLINE(PL_curcop);
7530             OPERATOR(UNLESS);
7531
7532         case KEY_unlink:
7533             LOP(OP_UNLINK,XTERM);
7534
7535         case KEY_undef:
7536             UNIDOR(OP_UNDEF);
7537
7538         case KEY_unpack:
7539             LOP(OP_UNPACK,XTERM);
7540
7541         case KEY_utime:
7542             LOP(OP_UTIME,XTERM);
7543
7544         case KEY_umask:
7545             UNIDOR(OP_UMASK);
7546
7547         case KEY_unshift:
7548             LOP(OP_UNSHIFT,XTERM);
7549
7550         case KEY_use:
7551             s = tokenize_use(1, s);
7552             OPERATOR(USE);
7553
7554         case KEY_values:
7555             UNI(OP_VALUES);
7556
7557         case KEY_vec:
7558             LOP(OP_VEC,XTERM);
7559
7560         case KEY_when:
7561             pl_yylval.ival = CopLINE(PL_curcop);
7562             OPERATOR(WHEN);
7563
7564         case KEY_while:
7565             pl_yylval.ival = CopLINE(PL_curcop);
7566             OPERATOR(WHILE);
7567
7568         case KEY_warn:
7569             PL_hints |= HINT_BLOCK_SCOPE;
7570             LOP(OP_WARN,XTERM);
7571
7572         case KEY_wait:
7573             FUN0(OP_WAIT);
7574
7575         case KEY_waitpid:
7576             LOP(OP_WAITPID,XTERM);
7577
7578         case KEY_wantarray:
7579             FUN0(OP_WANTARRAY);
7580
7581         case KEY_write:
7582 #ifdef EBCDIC
7583         {
7584             char ctl_l[2];
7585             ctl_l[0] = toCTRL('L');
7586             ctl_l[1] = '\0';
7587             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7588         }
7589 #else
7590             /* Make sure $^L is defined */
7591             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7592 #endif
7593             UNI(OP_ENTERWRITE);
7594
7595         case KEY_x:
7596             if (PL_expect == XOPERATOR)
7597                 Mop(OP_REPEAT);
7598             check_uni();
7599             goto just_a_word;
7600
7601         case KEY_xor:
7602             pl_yylval.ival = OP_XOR;
7603             OPERATOR(OROP);
7604
7605         case KEY_y:
7606             s = scan_trans(s);
7607             TERM(sublex_start());
7608         }
7609     }}
7610 }
7611 #ifdef __SC__
7612 #pragma segment Main
7613 #endif
7614
7615 static int
7616 S_pending_ident(pTHX)
7617 {
7618     dVAR;
7619     register char *d;
7620     PADOFFSET tmp = 0;
7621     /* pit holds the identifier we read and pending_ident is reset */
7622     char pit = PL_pending_ident;
7623     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7624     /* All routes through this function want to know if there is a colon.  */
7625     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7626     PL_pending_ident = 0;
7627
7628     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7629     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7630           "### Pending identifier '%s'\n", PL_tokenbuf); });
7631
7632     /* if we're in a my(), we can't allow dynamics here.
7633        $foo'bar has already been turned into $foo::bar, so
7634        just check for colons.
7635
7636        if it's a legal name, the OP is a PADANY.
7637     */
7638     if (PL_in_my) {
7639         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7640             if (has_colon)
7641                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7642                                   "variable %s in \"our\"",
7643                                   PL_tokenbuf));
7644             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7645         }
7646         else {
7647             if (has_colon)
7648                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7649                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7650
7651             pl_yylval.opval = newOP(OP_PADANY, 0);
7652             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7653             return PRIVATEREF;
7654         }
7655     }
7656
7657     /*
7658        build the ops for accesses to a my() variable.
7659
7660        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7661        then used in a comparison.  This catches most, but not
7662        all cases.  For instance, it catches
7663            sort { my($a); $a <=> $b }
7664        but not
7665            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7666        (although why you'd do that is anyone's guess).
7667     */
7668
7669     if (!has_colon) {
7670         if (!PL_in_my)
7671             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7672         if (tmp != NOT_IN_PAD) {
7673             /* might be an "our" variable" */
7674             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7675                 /* build ops for a bareword */
7676                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7677                 HEK * const stashname = HvNAME_HEK(stash);
7678                 SV *  const sym = newSVhek(stashname);
7679                 sv_catpvs(sym, "::");
7680                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7681                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7682                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7683                 gv_fetchsv(sym,
7684                     (PL_in_eval
7685                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7686                         : GV_ADDMULTI
7687                     ),
7688                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7689                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7690                      : SVt_PVHV));
7691                 return WORD;
7692             }
7693
7694             /* if it's a sort block and they're naming $a or $b */
7695             if (PL_last_lop_op == OP_SORT &&
7696                 PL_tokenbuf[0] == '$' &&
7697                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7698                 && !PL_tokenbuf[2])
7699             {
7700                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7701                      d < PL_bufend && *d != '\n';
7702                      d++)
7703                 {
7704                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7705                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7706                               PL_tokenbuf);
7707                     }
7708                 }
7709             }
7710
7711             pl_yylval.opval = newOP(OP_PADANY, 0);
7712             pl_yylval.opval->op_targ = tmp;
7713             return PRIVATEREF;
7714         }
7715     }
7716
7717     /*
7718        Whine if they've said @foo in a doublequoted string,
7719        and @foo isn't a variable we can find in the symbol
7720        table.
7721     */
7722     if (ckWARN(WARN_AMBIGUOUS) &&
7723         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7724         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7725                                          SVt_PVAV);
7726         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7727                 /* DO NOT warn for @- and @+ */
7728                 && !( PL_tokenbuf[2] == '\0' &&
7729                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7730            )
7731         {
7732             /* Downgraded from fatal to warning 20000522 mjd */
7733             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7734                         "Possible unintended interpolation of %s in string",
7735                         PL_tokenbuf);
7736         }
7737     }
7738
7739     /* build ops for a bareword */
7740     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7741                                                       tokenbuf_len - 1));
7742     pl_yylval.opval->op_private = OPpCONST_ENTERED;
7743     gv_fetchpvn_flags(
7744             PL_tokenbuf + 1, tokenbuf_len - 1,
7745             /* If the identifier refers to a stash, don't autovivify it.
7746              * Change 24660 had the side effect of causing symbol table
7747              * hashes to always be defined, even if they were freshly
7748              * created and the only reference in the entire program was
7749              * the single statement with the defined %foo::bar:: test.
7750              * It appears that all code in the wild doing this actually
7751              * wants to know whether sub-packages have been loaded, so
7752              * by avoiding auto-vivifying symbol tables, we ensure that
7753              * defined %foo::bar:: continues to be false, and the existing
7754              * tests still give the expected answers, even though what
7755              * they're actually testing has now changed subtly.
7756              */
7757             (*PL_tokenbuf == '%'
7758              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7759              && d[-1] == ':'
7760              ? 0
7761              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7762             ((PL_tokenbuf[0] == '$') ? SVt_PV
7763              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7764              : SVt_PVHV));
7765     return WORD;
7766 }
7767
7768 /*
7769  *  The following code was generated by perl_keyword.pl.
7770  */
7771
7772 I32
7773 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7774 {
7775     dVAR;
7776
7777     PERL_ARGS_ASSERT_KEYWORD;
7778
7779   switch (len)
7780   {
7781     case 1: /* 5 tokens of length 1 */
7782       switch (name[0])
7783       {
7784         case 'm':
7785           {                                       /* m          */
7786             return KEY_m;
7787           }
7788
7789         case 'q':
7790           {                                       /* q          */
7791             return KEY_q;
7792           }
7793
7794         case 's':
7795           {                                       /* s          */
7796             return KEY_s;
7797           }
7798
7799         case 'x':
7800           {                                       /* x          */
7801             return -KEY_x;
7802           }
7803
7804         case 'y':
7805           {                                       /* y          */
7806             return KEY_y;
7807           }
7808
7809         default:
7810           goto unknown;
7811       }
7812
7813     case 2: /* 18 tokens of length 2 */
7814       switch (name[0])
7815       {
7816         case 'd':
7817           if (name[1] == 'o')
7818           {                                       /* do         */
7819             return KEY_do;
7820           }
7821
7822           goto unknown;
7823
7824         case 'e':
7825           if (name[1] == 'q')
7826           {                                       /* eq         */
7827             return -KEY_eq;
7828           }
7829
7830           goto unknown;
7831
7832         case 'g':
7833           switch (name[1])
7834           {
7835             case 'e':
7836               {                                   /* ge         */
7837                 return -KEY_ge;
7838               }
7839
7840             case 't':
7841               {                                   /* gt         */
7842                 return -KEY_gt;
7843               }
7844
7845             default:
7846               goto unknown;
7847           }
7848
7849         case 'i':
7850           if (name[1] == 'f')
7851           {                                       /* if         */
7852             return KEY_if;
7853           }
7854
7855           goto unknown;
7856
7857         case 'l':
7858           switch (name[1])
7859           {
7860             case 'c':
7861               {                                   /* lc         */
7862                 return -KEY_lc;
7863               }
7864
7865             case 'e':
7866               {                                   /* le         */
7867                 return -KEY_le;
7868               }
7869
7870             case 't':
7871               {                                   /* lt         */
7872                 return -KEY_lt;
7873               }
7874
7875             default:
7876               goto unknown;
7877           }
7878
7879         case 'm':
7880           if (name[1] == 'y')
7881           {                                       /* my         */
7882             return KEY_my;
7883           }
7884
7885           goto unknown;
7886
7887         case 'n':
7888           switch (name[1])
7889           {
7890             case 'e':
7891               {                                   /* ne         */
7892                 return -KEY_ne;
7893               }
7894
7895             case 'o':
7896               {                                   /* no         */
7897                 return KEY_no;
7898               }
7899
7900             default:
7901               goto unknown;
7902           }
7903
7904         case 'o':
7905           if (name[1] == 'r')
7906           {                                       /* or         */
7907             return -KEY_or;
7908           }
7909
7910           goto unknown;
7911
7912         case 'q':
7913           switch (name[1])
7914           {
7915             case 'q':
7916               {                                   /* qq         */
7917                 return KEY_qq;
7918               }
7919
7920             case 'r':
7921               {                                   /* qr         */
7922                 return KEY_qr;
7923               }
7924
7925             case 'w':
7926               {                                   /* qw         */
7927                 return KEY_qw;
7928               }
7929
7930             case 'x':
7931               {                                   /* qx         */
7932                 return KEY_qx;
7933               }
7934
7935             default:
7936               goto unknown;
7937           }
7938
7939         case 't':
7940           if (name[1] == 'r')
7941           {                                       /* tr         */
7942             return KEY_tr;
7943           }
7944
7945           goto unknown;
7946
7947         case 'u':
7948           if (name[1] == 'c')
7949           {                                       /* uc         */
7950             return -KEY_uc;
7951           }
7952
7953           goto unknown;
7954
7955         default:
7956           goto unknown;
7957       }
7958
7959     case 3: /* 29 tokens of length 3 */
7960       switch (name[0])
7961       {
7962         case 'E':
7963           if (name[1] == 'N' &&
7964               name[2] == 'D')
7965           {                                       /* END        */
7966             return KEY_END;
7967           }
7968
7969           goto unknown;
7970
7971         case 'a':
7972           switch (name[1])
7973           {
7974             case 'b':
7975               if (name[2] == 's')
7976               {                                   /* abs        */
7977                 return -KEY_abs;
7978               }
7979
7980               goto unknown;
7981
7982             case 'n':
7983               if (name[2] == 'd')
7984               {                                   /* and        */
7985                 return -KEY_and;
7986               }
7987
7988               goto unknown;
7989
7990             default:
7991               goto unknown;
7992           }
7993
7994         case 'c':
7995           switch (name[1])
7996           {
7997             case 'h':
7998               if (name[2] == 'r')
7999               {                                   /* chr        */
8000                 return -KEY_chr;
8001               }
8002
8003               goto unknown;
8004
8005             case 'm':
8006               if (name[2] == 'p')
8007               {                                   /* cmp        */
8008                 return -KEY_cmp;
8009               }
8010
8011               goto unknown;
8012
8013             case 'o':
8014               if (name[2] == 's')
8015               {                                   /* cos        */
8016                 return -KEY_cos;
8017               }
8018
8019               goto unknown;
8020
8021             default:
8022               goto unknown;
8023           }
8024
8025         case 'd':
8026           if (name[1] == 'i' &&
8027               name[2] == 'e')
8028           {                                       /* die        */
8029             return -KEY_die;
8030           }
8031
8032           goto unknown;
8033
8034         case 'e':
8035           switch (name[1])
8036           {
8037             case 'o':
8038               if (name[2] == 'f')
8039               {                                   /* eof        */
8040                 return -KEY_eof;
8041               }
8042
8043               goto unknown;
8044
8045             case 'x':
8046               if (name[2] == 'p')
8047               {                                   /* exp        */
8048                 return -KEY_exp;
8049               }
8050
8051               goto unknown;
8052
8053             default:
8054               goto unknown;
8055           }
8056
8057         case 'f':
8058           if (name[1] == 'o' &&
8059               name[2] == 'r')
8060           {                                       /* for        */
8061             return KEY_for;
8062           }
8063
8064           goto unknown;
8065
8066         case 'h':
8067           if (name[1] == 'e' &&
8068               name[2] == 'x')
8069           {                                       /* hex        */
8070             return -KEY_hex;
8071           }
8072
8073           goto unknown;
8074
8075         case 'i':
8076           if (name[1] == 'n' &&
8077               name[2] == 't')
8078           {                                       /* int        */
8079             return -KEY_int;
8080           }
8081
8082           goto unknown;
8083
8084         case 'l':
8085           if (name[1] == 'o' &&
8086               name[2] == 'g')
8087           {                                       /* log        */
8088             return -KEY_log;
8089           }
8090
8091           goto unknown;
8092
8093         case 'm':
8094           if (name[1] == 'a' &&
8095               name[2] == 'p')
8096           {                                       /* map        */
8097             return KEY_map;
8098           }
8099
8100           goto unknown;
8101
8102         case 'n':
8103           if (name[1] == 'o' &&
8104               name[2] == 't')
8105           {                                       /* not        */
8106             return -KEY_not;
8107           }
8108
8109           goto unknown;
8110
8111         case 'o':
8112           switch (name[1])
8113           {
8114             case 'c':
8115               if (name[2] == 't')
8116               {                                   /* oct        */
8117                 return -KEY_oct;
8118               }
8119
8120               goto unknown;
8121
8122             case 'r':
8123               if (name[2] == 'd')
8124               {                                   /* ord        */
8125                 return -KEY_ord;
8126               }
8127
8128               goto unknown;
8129
8130             case 'u':
8131               if (name[2] == 'r')
8132               {                                   /* our        */
8133                 return KEY_our;
8134               }
8135
8136               goto unknown;
8137
8138             default:
8139               goto unknown;
8140           }
8141
8142         case 'p':
8143           if (name[1] == 'o')
8144           {
8145             switch (name[2])
8146             {
8147               case 'p':
8148                 {                                 /* pop        */
8149                   return -KEY_pop;
8150                 }
8151
8152               case 's':
8153                 {                                 /* pos        */
8154                   return KEY_pos;
8155                 }
8156
8157               default:
8158                 goto unknown;
8159             }
8160           }
8161
8162           goto unknown;
8163
8164         case 'r':
8165           if (name[1] == 'e' &&
8166               name[2] == 'f')
8167           {                                       /* ref        */
8168             return -KEY_ref;
8169           }
8170
8171           goto unknown;
8172
8173         case 's':
8174           switch (name[1])
8175           {
8176             case 'a':
8177               if (name[2] == 'y')
8178               {                                   /* say        */
8179                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8180               }
8181
8182               goto unknown;
8183
8184             case 'i':
8185               if (name[2] == 'n')
8186               {                                   /* sin        */
8187                 return -KEY_sin;
8188               }
8189
8190               goto unknown;
8191
8192             case 'u':
8193               if (name[2] == 'b')
8194               {                                   /* sub        */
8195                 return KEY_sub;
8196               }
8197
8198               goto unknown;
8199
8200             default:
8201               goto unknown;
8202           }
8203
8204         case 't':
8205           if (name[1] == 'i' &&
8206               name[2] == 'e')
8207           {                                       /* tie        */
8208             return KEY_tie;
8209           }
8210
8211           goto unknown;
8212
8213         case 'u':
8214           if (name[1] == 's' &&
8215               name[2] == 'e')
8216           {                                       /* use        */
8217             return KEY_use;
8218           }
8219
8220           goto unknown;
8221
8222         case 'v':
8223           if (name[1] == 'e' &&
8224               name[2] == 'c')
8225           {                                       /* vec        */
8226             return -KEY_vec;
8227           }
8228
8229           goto unknown;
8230
8231         case 'x':
8232           if (name[1] == 'o' &&
8233               name[2] == 'r')
8234           {                                       /* xor        */
8235             return -KEY_xor;
8236           }
8237
8238           goto unknown;
8239
8240         default:
8241           goto unknown;
8242       }
8243
8244     case 4: /* 41 tokens of length 4 */
8245       switch (name[0])
8246       {
8247         case 'C':
8248           if (name[1] == 'O' &&
8249               name[2] == 'R' &&
8250               name[3] == 'E')
8251           {                                       /* CORE       */
8252             return -KEY_CORE;
8253           }
8254
8255           goto unknown;
8256
8257         case 'I':
8258           if (name[1] == 'N' &&
8259               name[2] == 'I' &&
8260               name[3] == 'T')
8261           {                                       /* INIT       */
8262             return KEY_INIT;
8263           }
8264
8265           goto unknown;
8266
8267         case 'b':
8268           if (name[1] == 'i' &&
8269               name[2] == 'n' &&
8270               name[3] == 'd')
8271           {                                       /* bind       */
8272             return -KEY_bind;
8273           }
8274
8275           goto unknown;
8276
8277         case 'c':
8278           if (name[1] == 'h' &&
8279               name[2] == 'o' &&
8280               name[3] == 'p')
8281           {                                       /* chop       */
8282             return -KEY_chop;
8283           }
8284
8285           goto unknown;
8286
8287         case 'd':
8288           if (name[1] == 'u' &&
8289               name[2] == 'm' &&
8290               name[3] == 'p')
8291           {                                       /* dump       */
8292             return -KEY_dump;
8293           }
8294
8295           goto unknown;
8296
8297         case 'e':
8298           switch (name[1])
8299           {
8300             case 'a':
8301               if (name[2] == 'c' &&
8302                   name[3] == 'h')
8303               {                                   /* each       */
8304                 return -KEY_each;
8305               }
8306
8307               goto unknown;
8308
8309             case 'l':
8310               if (name[2] == 's' &&
8311                   name[3] == 'e')
8312               {                                   /* else       */
8313                 return KEY_else;
8314               }
8315
8316               goto unknown;
8317
8318             case 'v':
8319               if (name[2] == 'a' &&
8320                   name[3] == 'l')
8321               {                                   /* eval       */
8322                 return KEY_eval;
8323               }
8324
8325               goto unknown;
8326
8327             case 'x':
8328               switch (name[2])
8329               {
8330                 case 'e':
8331                   if (name[3] == 'c')
8332                   {                               /* exec       */
8333                     return -KEY_exec;
8334                   }
8335
8336                   goto unknown;
8337
8338                 case 'i':
8339                   if (name[3] == 't')
8340                   {                               /* exit       */
8341                     return -KEY_exit;
8342                   }
8343
8344                   goto unknown;
8345
8346                 default:
8347                   goto unknown;
8348               }
8349
8350             default:
8351               goto unknown;
8352           }
8353
8354         case 'f':
8355           if (name[1] == 'o' &&
8356               name[2] == 'r' &&
8357               name[3] == 'k')
8358           {                                       /* fork       */
8359             return -KEY_fork;
8360           }
8361
8362           goto unknown;
8363
8364         case 'g':
8365           switch (name[1])
8366           {
8367             case 'e':
8368               if (name[2] == 't' &&
8369                   name[3] == 'c')
8370               {                                   /* getc       */
8371                 return -KEY_getc;
8372               }
8373
8374               goto unknown;
8375
8376             case 'l':
8377               if (name[2] == 'o' &&
8378                   name[3] == 'b')
8379               {                                   /* glob       */
8380                 return KEY_glob;
8381               }
8382
8383               goto unknown;
8384
8385             case 'o':
8386               if (name[2] == 't' &&
8387                   name[3] == 'o')
8388               {                                   /* goto       */
8389                 return KEY_goto;
8390               }
8391
8392               goto unknown;
8393
8394             case 'r':
8395               if (name[2] == 'e' &&
8396                   name[3] == 'p')
8397               {                                   /* grep       */
8398                 return KEY_grep;
8399               }
8400
8401               goto unknown;
8402
8403             default:
8404               goto unknown;
8405           }
8406
8407         case 'j':
8408           if (name[1] == 'o' &&
8409               name[2] == 'i' &&
8410               name[3] == 'n')
8411           {                                       /* join       */
8412             return -KEY_join;
8413           }
8414
8415           goto unknown;
8416
8417         case 'k':
8418           switch (name[1])
8419           {
8420             case 'e':
8421               if (name[2] == 'y' &&
8422                   name[3] == 's')
8423               {                                   /* keys       */
8424                 return -KEY_keys;
8425               }
8426
8427               goto unknown;
8428
8429             case 'i':
8430               if (name[2] == 'l' &&
8431                   name[3] == 'l')
8432               {                                   /* kill       */
8433                 return -KEY_kill;
8434               }
8435
8436               goto unknown;
8437
8438             default:
8439               goto unknown;
8440           }
8441
8442         case 'l':
8443           switch (name[1])
8444           {
8445             case 'a':
8446               if (name[2] == 's' &&
8447                   name[3] == 't')
8448               {                                   /* last       */
8449                 return KEY_last;
8450               }
8451
8452               goto unknown;
8453
8454             case 'i':
8455               if (name[2] == 'n' &&
8456                   name[3] == 'k')
8457               {                                   /* link       */
8458                 return -KEY_link;
8459               }
8460
8461               goto unknown;
8462
8463             case 'o':
8464               if (name[2] == 'c' &&
8465                   name[3] == 'k')
8466               {                                   /* lock       */
8467                 return -KEY_lock;
8468               }
8469
8470               goto unknown;
8471
8472             default:
8473               goto unknown;
8474           }
8475
8476         case 'n':
8477           if (name[1] == 'e' &&
8478               name[2] == 'x' &&
8479               name[3] == 't')
8480           {                                       /* next       */
8481             return KEY_next;
8482           }
8483
8484           goto unknown;
8485
8486         case 'o':
8487           if (name[1] == 'p' &&
8488               name[2] == 'e' &&
8489               name[3] == 'n')
8490           {                                       /* open       */
8491             return -KEY_open;
8492           }
8493
8494           goto unknown;
8495
8496         case 'p':
8497           switch (name[1])
8498           {
8499             case 'a':
8500               if (name[2] == 'c' &&
8501                   name[3] == 'k')
8502               {                                   /* pack       */
8503                 return -KEY_pack;
8504               }
8505
8506               goto unknown;
8507
8508             case 'i':
8509               if (name[2] == 'p' &&
8510                   name[3] == 'e')
8511               {                                   /* pipe       */
8512                 return -KEY_pipe;
8513               }
8514
8515               goto unknown;
8516
8517             case 'u':
8518               if (name[2] == 's' &&
8519                   name[3] == 'h')
8520               {                                   /* push       */
8521                 return -KEY_push;
8522               }
8523
8524               goto unknown;
8525
8526             default:
8527               goto unknown;
8528           }
8529
8530         case 'r':
8531           switch (name[1])
8532           {
8533             case 'a':
8534               if (name[2] == 'n' &&
8535                   name[3] == 'd')
8536               {                                   /* rand       */
8537                 return -KEY_rand;
8538               }
8539
8540               goto unknown;
8541
8542             case 'e':
8543               switch (name[2])
8544               {
8545                 case 'a':
8546                   if (name[3] == 'd')
8547                   {                               /* read       */
8548                     return -KEY_read;
8549                   }
8550
8551                   goto unknown;
8552
8553                 case 'c':
8554                   if (name[3] == 'v')
8555                   {                               /* recv       */
8556                     return -KEY_recv;
8557                   }
8558
8559                   goto unknown;
8560
8561                 case 'd':
8562                   if (name[3] == 'o')
8563                   {                               /* redo       */
8564                     return KEY_redo;
8565                   }
8566
8567                   goto unknown;
8568
8569                 default:
8570                   goto unknown;
8571               }
8572
8573             default:
8574               goto unknown;
8575           }
8576
8577         case 's':
8578           switch (name[1])
8579           {
8580             case 'e':
8581               switch (name[2])
8582               {
8583                 case 'e':
8584                   if (name[3] == 'k')
8585                   {                               /* seek       */
8586                     return -KEY_seek;
8587                   }
8588
8589                   goto unknown;
8590
8591                 case 'n':
8592                   if (name[3] == 'd')
8593                   {                               /* send       */
8594                     return -KEY_send;
8595                   }
8596
8597                   goto unknown;
8598
8599                 default:
8600                   goto unknown;
8601               }
8602
8603             case 'o':
8604               if (name[2] == 'r' &&
8605                   name[3] == 't')
8606               {                                   /* sort       */
8607                 return KEY_sort;
8608               }
8609
8610               goto unknown;
8611
8612             case 'q':
8613               if (name[2] == 'r' &&
8614                   name[3] == 't')
8615               {                                   /* sqrt       */
8616                 return -KEY_sqrt;
8617               }
8618
8619               goto unknown;
8620
8621             case 't':
8622               if (name[2] == 'a' &&
8623                   name[3] == 't')
8624               {                                   /* stat       */
8625                 return -KEY_stat;
8626               }
8627
8628               goto unknown;
8629
8630             default:
8631               goto unknown;
8632           }
8633
8634         case 't':
8635           switch (name[1])
8636           {
8637             case 'e':
8638               if (name[2] == 'l' &&
8639                   name[3] == 'l')
8640               {                                   /* tell       */
8641                 return -KEY_tell;
8642               }
8643
8644               goto unknown;
8645
8646             case 'i':
8647               switch (name[2])
8648               {
8649                 case 'e':
8650                   if (name[3] == 'd')
8651                   {                               /* tied       */
8652                     return KEY_tied;
8653                   }
8654
8655                   goto unknown;
8656
8657                 case 'm':
8658                   if (name[3] == 'e')
8659                   {                               /* time       */
8660                     return -KEY_time;
8661                   }
8662
8663                   goto unknown;
8664
8665                 default:
8666                   goto unknown;
8667               }
8668
8669             default:
8670               goto unknown;
8671           }
8672
8673         case 'w':
8674           switch (name[1])
8675           {
8676             case 'a':
8677               switch (name[2])
8678               {
8679                 case 'i':
8680                   if (name[3] == 't')
8681                   {                               /* wait       */
8682                     return -KEY_wait;
8683                   }
8684
8685                   goto unknown;
8686
8687                 case 'r':
8688                   if (name[3] == 'n')
8689                   {                               /* warn       */
8690                     return -KEY_warn;
8691                   }
8692
8693                   goto unknown;
8694
8695                 default:
8696                   goto unknown;
8697               }
8698
8699             case 'h':
8700               if (name[2] == 'e' &&
8701                   name[3] == 'n')
8702               {                                   /* when       */
8703                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8704               }
8705
8706               goto unknown;
8707
8708             default:
8709               goto unknown;
8710           }
8711
8712         default:
8713           goto unknown;
8714       }
8715
8716     case 5: /* 39 tokens of length 5 */
8717       switch (name[0])
8718       {
8719         case 'B':
8720           if (name[1] == 'E' &&
8721               name[2] == 'G' &&
8722               name[3] == 'I' &&
8723               name[4] == 'N')
8724           {                                       /* BEGIN      */
8725             return KEY_BEGIN;
8726           }
8727
8728           goto unknown;
8729
8730         case 'C':
8731           if (name[1] == 'H' &&
8732               name[2] == 'E' &&
8733               name[3] == 'C' &&
8734               name[4] == 'K')
8735           {                                       /* CHECK      */
8736             return KEY_CHECK;
8737           }
8738
8739           goto unknown;
8740
8741         case 'a':
8742           switch (name[1])
8743           {
8744             case 'l':
8745               if (name[2] == 'a' &&
8746                   name[3] == 'r' &&
8747                   name[4] == 'm')
8748               {                                   /* alarm      */
8749                 return -KEY_alarm;
8750               }
8751
8752               goto unknown;
8753
8754             case 't':
8755               if (name[2] == 'a' &&
8756                   name[3] == 'n' &&
8757                   name[4] == '2')
8758               {                                   /* atan2      */
8759                 return -KEY_atan2;
8760               }
8761
8762               goto unknown;
8763
8764             default:
8765               goto unknown;
8766           }
8767
8768         case 'b':
8769           switch (name[1])
8770           {
8771             case 'l':
8772               if (name[2] == 'e' &&
8773                   name[3] == 's' &&
8774                   name[4] == 's')
8775               {                                   /* bless      */
8776                 return -KEY_bless;
8777               }
8778
8779               goto unknown;
8780
8781             case 'r':
8782               if (name[2] == 'e' &&
8783                   name[3] == 'a' &&
8784                   name[4] == 'k')
8785               {                                   /* break      */
8786                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8787               }
8788
8789               goto unknown;
8790
8791             default:
8792               goto unknown;
8793           }
8794
8795         case 'c':
8796           switch (name[1])
8797           {
8798             case 'h':
8799               switch (name[2])
8800               {
8801                 case 'd':
8802                   if (name[3] == 'i' &&
8803                       name[4] == 'r')
8804                   {                               /* chdir      */
8805                     return -KEY_chdir;
8806                   }
8807
8808                   goto unknown;
8809
8810                 case 'm':
8811                   if (name[3] == 'o' &&
8812                       name[4] == 'd')
8813                   {                               /* chmod      */
8814                     return -KEY_chmod;
8815                   }
8816
8817                   goto unknown;
8818
8819                 case 'o':
8820                   switch (name[3])
8821                   {
8822                     case 'm':
8823                       if (name[4] == 'p')
8824                       {                           /* chomp      */
8825                         return -KEY_chomp;
8826                       }
8827
8828                       goto unknown;
8829
8830                     case 'w':
8831                       if (name[4] == 'n')
8832                       {                           /* chown      */
8833                         return -KEY_chown;
8834                       }
8835
8836                       goto unknown;
8837
8838                     default:
8839                       goto unknown;
8840                   }
8841
8842                 default:
8843                   goto unknown;
8844               }
8845
8846             case 'l':
8847               if (name[2] == 'o' &&
8848                   name[3] == 's' &&
8849                   name[4] == 'e')
8850               {                                   /* close      */
8851                 return -KEY_close;
8852               }
8853
8854               goto unknown;
8855
8856             case 'r':
8857               if (name[2] == 'y' &&
8858                   name[3] == 'p' &&
8859                   name[4] == 't')
8860               {                                   /* crypt      */
8861                 return -KEY_crypt;
8862               }
8863
8864               goto unknown;
8865
8866             default:
8867               goto unknown;
8868           }
8869
8870         case 'e':
8871           if (name[1] == 'l' &&
8872               name[2] == 's' &&
8873               name[3] == 'i' &&
8874               name[4] == 'f')
8875           {                                       /* elsif      */
8876             return KEY_elsif;
8877           }
8878
8879           goto unknown;
8880
8881         case 'f':
8882           switch (name[1])
8883           {
8884             case 'c':
8885               if (name[2] == 'n' &&
8886                   name[3] == 't' &&
8887                   name[4] == 'l')
8888               {                                   /* fcntl      */
8889                 return -KEY_fcntl;
8890               }
8891
8892               goto unknown;
8893
8894             case 'l':
8895               if (name[2] == 'o' &&
8896                   name[3] == 'c' &&
8897                   name[4] == 'k')
8898               {                                   /* flock      */
8899                 return -KEY_flock;
8900               }
8901
8902               goto unknown;
8903
8904             default:
8905               goto unknown;
8906           }
8907
8908         case 'g':
8909           if (name[1] == 'i' &&
8910               name[2] == 'v' &&
8911               name[3] == 'e' &&
8912               name[4] == 'n')
8913           {                                       /* given      */
8914             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8915           }
8916
8917           goto unknown;
8918
8919         case 'i':
8920           switch (name[1])
8921           {
8922             case 'n':
8923               if (name[2] == 'd' &&
8924                   name[3] == 'e' &&
8925                   name[4] == 'x')
8926               {                                   /* index      */
8927                 return -KEY_index;
8928               }
8929
8930               goto unknown;
8931
8932             case 'o':
8933               if (name[2] == 'c' &&
8934                   name[3] == 't' &&
8935                   name[4] == 'l')
8936               {                                   /* ioctl      */
8937                 return -KEY_ioctl;
8938               }
8939
8940               goto unknown;
8941
8942             default:
8943               goto unknown;
8944           }
8945
8946         case 'l':
8947           switch (name[1])
8948           {
8949             case 'o':
8950               if (name[2] == 'c' &&
8951                   name[3] == 'a' &&
8952                   name[4] == 'l')
8953               {                                   /* local      */
8954                 return KEY_local;
8955               }
8956
8957               goto unknown;
8958
8959             case 's':
8960               if (name[2] == 't' &&
8961                   name[3] == 'a' &&
8962                   name[4] == 't')
8963               {                                   /* lstat      */
8964                 return -KEY_lstat;
8965               }
8966
8967               goto unknown;
8968
8969             default:
8970               goto unknown;
8971           }
8972
8973         case 'm':
8974           if (name[1] == 'k' &&
8975               name[2] == 'd' &&
8976               name[3] == 'i' &&
8977               name[4] == 'r')
8978           {                                       /* mkdir      */
8979             return -KEY_mkdir;
8980           }
8981
8982           goto unknown;
8983
8984         case 'p':
8985           if (name[1] == 'r' &&
8986               name[2] == 'i' &&
8987               name[3] == 'n' &&
8988               name[4] == 't')
8989           {                                       /* print      */
8990             return KEY_print;
8991           }
8992
8993           goto unknown;
8994
8995         case 'r':
8996           switch (name[1])
8997           {
8998             case 'e':
8999               if (name[2] == 's' &&
9000                   name[3] == 'e' &&
9001                   name[4] == 't')
9002               {                                   /* reset      */
9003                 return -KEY_reset;
9004               }
9005
9006               goto unknown;
9007
9008             case 'm':
9009               if (name[2] == 'd' &&
9010                   name[3] == 'i' &&
9011                   name[4] == 'r')
9012               {                                   /* rmdir      */
9013                 return -KEY_rmdir;
9014               }
9015
9016               goto unknown;
9017
9018             default:
9019               goto unknown;
9020           }
9021
9022         case 's':
9023           switch (name[1])
9024           {
9025             case 'e':
9026               if (name[2] == 'm' &&
9027                   name[3] == 'o' &&
9028                   name[4] == 'p')
9029               {                                   /* semop      */
9030                 return -KEY_semop;
9031               }
9032
9033               goto unknown;
9034
9035             case 'h':
9036               if (name[2] == 'i' &&
9037                   name[3] == 'f' &&
9038                   name[4] == 't')
9039               {                                   /* shift      */
9040                 return -KEY_shift;
9041               }
9042
9043               goto unknown;
9044
9045             case 'l':
9046               if (name[2] == 'e' &&
9047                   name[3] == 'e' &&
9048                   name[4] == 'p')
9049               {                                   /* sleep      */
9050                 return -KEY_sleep;
9051               }
9052
9053               goto unknown;
9054
9055             case 'p':
9056               if (name[2] == 'l' &&
9057                   name[3] == 'i' &&
9058                   name[4] == 't')
9059               {                                   /* split      */
9060                 return KEY_split;
9061               }
9062
9063               goto unknown;
9064
9065             case 'r':
9066               if (name[2] == 'a' &&
9067                   name[3] == 'n' &&
9068                   name[4] == 'd')
9069               {                                   /* srand      */
9070                 return -KEY_srand;
9071               }
9072
9073               goto unknown;
9074
9075             case 't':
9076               switch (name[2])
9077               {
9078                 case 'a':
9079                   if (name[3] == 't' &&
9080                       name[4] == 'e')
9081                   {                               /* state      */
9082                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9083                   }
9084
9085                   goto unknown;
9086
9087                 case 'u':
9088                   if (name[3] == 'd' &&
9089                       name[4] == 'y')
9090                   {                               /* study      */
9091                     return KEY_study;
9092                   }
9093
9094                   goto unknown;
9095
9096                 default:
9097                   goto unknown;
9098               }
9099
9100             default:
9101               goto unknown;
9102           }
9103
9104         case 't':
9105           if (name[1] == 'i' &&
9106               name[2] == 'm' &&
9107               name[3] == 'e' &&
9108               name[4] == 's')
9109           {                                       /* times      */
9110             return -KEY_times;
9111           }
9112
9113           goto unknown;
9114
9115         case 'u':
9116           switch (name[1])
9117           {
9118             case 'm':
9119               if (name[2] == 'a' &&
9120                   name[3] == 's' &&
9121                   name[4] == 'k')
9122               {                                   /* umask      */
9123                 return -KEY_umask;
9124               }
9125
9126               goto unknown;
9127
9128             case 'n':
9129               switch (name[2])
9130               {
9131                 case 'd':
9132                   if (name[3] == 'e' &&
9133                       name[4] == 'f')
9134                   {                               /* undef      */
9135                     return KEY_undef;
9136                   }
9137
9138                   goto unknown;
9139
9140                 case 't':
9141                   if (name[3] == 'i')
9142                   {
9143                     switch (name[4])
9144                     {
9145                       case 'e':
9146                         {                         /* untie      */
9147                           return KEY_untie;
9148                         }
9149
9150                       case 'l':
9151                         {                         /* until      */
9152                           return KEY_until;
9153                         }
9154
9155                       default:
9156                         goto unknown;
9157                     }
9158                   }
9159
9160                   goto unknown;
9161
9162                 default:
9163                   goto unknown;
9164               }
9165
9166             case 't':
9167               if (name[2] == 'i' &&
9168                   name[3] == 'm' &&
9169                   name[4] == 'e')
9170               {                                   /* utime      */
9171                 return -KEY_utime;
9172               }
9173
9174               goto unknown;
9175
9176             default:
9177               goto unknown;
9178           }
9179
9180         case 'w':
9181           switch (name[1])
9182           {
9183             case 'h':
9184               if (name[2] == 'i' &&
9185                   name[3] == 'l' &&
9186                   name[4] == 'e')
9187               {                                   /* while      */
9188                 return KEY_while;
9189               }
9190
9191               goto unknown;
9192
9193             case 'r':
9194               if (name[2] == 'i' &&
9195                   name[3] == 't' &&
9196                   name[4] == 'e')
9197               {                                   /* write      */
9198                 return -KEY_write;
9199               }
9200
9201               goto unknown;
9202
9203             default:
9204               goto unknown;
9205           }
9206
9207         default:
9208           goto unknown;
9209       }
9210
9211     case 6: /* 33 tokens of length 6 */
9212       switch (name[0])
9213       {
9214         case 'a':
9215           if (name[1] == 'c' &&
9216               name[2] == 'c' &&
9217               name[3] == 'e' &&
9218               name[4] == 'p' &&
9219               name[5] == 't')
9220           {                                       /* accept     */
9221             return -KEY_accept;
9222           }
9223
9224           goto unknown;
9225
9226         case 'c':
9227           switch (name[1])
9228           {
9229             case 'a':
9230               if (name[2] == 'l' &&
9231                   name[3] == 'l' &&
9232                   name[4] == 'e' &&
9233                   name[5] == 'r')
9234               {                                   /* caller     */
9235                 return -KEY_caller;
9236               }
9237
9238               goto unknown;
9239
9240             case 'h':
9241               if (name[2] == 'r' &&
9242                   name[3] == 'o' &&
9243                   name[4] == 'o' &&
9244                   name[5] == 't')
9245               {                                   /* chroot     */
9246                 return -KEY_chroot;
9247               }
9248
9249               goto unknown;
9250
9251             default:
9252               goto unknown;
9253           }
9254
9255         case 'd':
9256           if (name[1] == 'e' &&
9257               name[2] == 'l' &&
9258               name[3] == 'e' &&
9259               name[4] == 't' &&
9260               name[5] == 'e')
9261           {                                       /* delete     */
9262             return KEY_delete;
9263           }
9264
9265           goto unknown;
9266
9267         case 'e':
9268           switch (name[1])
9269           {
9270             case 'l':
9271               if (name[2] == 's' &&
9272                   name[3] == 'e' &&
9273                   name[4] == 'i' &&
9274                   name[5] == 'f')
9275               {                                   /* elseif     */
9276                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9277               }
9278
9279               goto unknown;
9280
9281             case 'x':
9282               if (name[2] == 'i' &&
9283                   name[3] == 's' &&
9284                   name[4] == 't' &&
9285                   name[5] == 's')
9286               {                                   /* exists     */
9287                 return KEY_exists;
9288               }
9289
9290               goto unknown;
9291
9292             default:
9293               goto unknown;
9294           }
9295
9296         case 'f':
9297           switch (name[1])
9298           {
9299             case 'i':
9300               if (name[2] == 'l' &&
9301                   name[3] == 'e' &&
9302                   name[4] == 'n' &&
9303                   name[5] == 'o')
9304               {                                   /* fileno     */
9305                 return -KEY_fileno;
9306               }
9307
9308               goto unknown;
9309
9310             case 'o':
9311               if (name[2] == 'r' &&
9312                   name[3] == 'm' &&
9313                   name[4] == 'a' &&
9314                   name[5] == 't')
9315               {                                   /* format     */
9316                 return KEY_format;
9317               }
9318
9319               goto unknown;
9320
9321             default:
9322               goto unknown;
9323           }
9324
9325         case 'g':
9326           if (name[1] == 'm' &&
9327               name[2] == 't' &&
9328               name[3] == 'i' &&
9329               name[4] == 'm' &&
9330               name[5] == 'e')
9331           {                                       /* gmtime     */
9332             return -KEY_gmtime;
9333           }
9334
9335           goto unknown;
9336
9337         case 'l':
9338           switch (name[1])
9339           {
9340             case 'e':
9341               if (name[2] == 'n' &&
9342                   name[3] == 'g' &&
9343                   name[4] == 't' &&
9344                   name[5] == 'h')
9345               {                                   /* length     */
9346                 return -KEY_length;
9347               }
9348
9349               goto unknown;
9350
9351             case 'i':
9352               if (name[2] == 's' &&
9353                   name[3] == 't' &&
9354                   name[4] == 'e' &&
9355                   name[5] == 'n')
9356               {                                   /* listen     */
9357                 return -KEY_listen;
9358               }
9359
9360               goto unknown;
9361
9362             default:
9363               goto unknown;
9364           }
9365
9366         case 'm':
9367           if (name[1] == 's' &&
9368               name[2] == 'g')
9369           {
9370             switch (name[3])
9371             {
9372               case 'c':
9373                 if (name[4] == 't' &&
9374                     name[5] == 'l')
9375                 {                                 /* msgctl     */
9376                   return -KEY_msgctl;
9377                 }
9378
9379                 goto unknown;
9380
9381               case 'g':
9382                 if (name[4] == 'e' &&
9383                     name[5] == 't')
9384                 {                                 /* msgget     */
9385                   return -KEY_msgget;
9386                 }
9387
9388                 goto unknown;
9389
9390               case 'r':
9391                 if (name[4] == 'c' &&
9392                     name[5] == 'v')
9393                 {                                 /* msgrcv     */
9394                   return -KEY_msgrcv;
9395                 }
9396
9397                 goto unknown;
9398
9399               case 's':
9400                 if (name[4] == 'n' &&
9401                     name[5] == 'd')
9402                 {                                 /* msgsnd     */
9403                   return -KEY_msgsnd;
9404                 }
9405
9406                 goto unknown;
9407
9408               default:
9409                 goto unknown;
9410             }
9411           }
9412
9413           goto unknown;
9414
9415         case 'p':
9416           if (name[1] == 'r' &&
9417               name[2] == 'i' &&
9418               name[3] == 'n' &&
9419               name[4] == 't' &&
9420               name[5] == 'f')
9421           {                                       /* printf     */
9422             return KEY_printf;
9423           }
9424
9425           goto unknown;
9426
9427         case 'r':
9428           switch (name[1])
9429           {
9430             case 'e':
9431               switch (name[2])
9432               {
9433                 case 'n':
9434                   if (name[3] == 'a' &&
9435                       name[4] == 'm' &&
9436                       name[5] == 'e')
9437                   {                               /* rename     */
9438                     return -KEY_rename;
9439                   }
9440
9441                   goto unknown;
9442
9443                 case 't':
9444                   if (name[3] == 'u' &&
9445                       name[4] == 'r' &&
9446                       name[5] == 'n')
9447                   {                               /* return     */
9448                     return KEY_return;
9449                   }
9450
9451                   goto unknown;
9452
9453                 default:
9454                   goto unknown;
9455               }
9456
9457             case 'i':
9458               if (name[2] == 'n' &&
9459                   name[3] == 'd' &&
9460                   name[4] == 'e' &&
9461                   name[5] == 'x')
9462               {                                   /* rindex     */
9463                 return -KEY_rindex;
9464               }
9465
9466               goto unknown;
9467
9468             default:
9469               goto unknown;
9470           }
9471
9472         case 's':
9473           switch (name[1])
9474           {
9475             case 'c':
9476               if (name[2] == 'a' &&
9477                   name[3] == 'l' &&
9478                   name[4] == 'a' &&
9479                   name[5] == 'r')
9480               {                                   /* scalar     */
9481                 return KEY_scalar;
9482               }
9483
9484               goto unknown;
9485
9486             case 'e':
9487               switch (name[2])
9488               {
9489                 case 'l':
9490                   if (name[3] == 'e' &&
9491                       name[4] == 'c' &&
9492                       name[5] == 't')
9493                   {                               /* select     */
9494                     return -KEY_select;
9495                   }
9496
9497                   goto unknown;
9498
9499                 case 'm':
9500                   switch (name[3])
9501                   {
9502                     case 'c':
9503                       if (name[4] == 't' &&
9504                           name[5] == 'l')
9505                       {                           /* semctl     */
9506                         return -KEY_semctl;
9507                       }
9508
9509                       goto unknown;
9510
9511                     case 'g':
9512                       if (name[4] == 'e' &&
9513                           name[5] == 't')
9514                       {                           /* semget     */
9515                         return -KEY_semget;
9516                       }
9517
9518                       goto unknown;
9519
9520                     default:
9521                       goto unknown;
9522                   }
9523
9524                 default:
9525                   goto unknown;
9526               }
9527
9528             case 'h':
9529               if (name[2] == 'm')
9530               {
9531                 switch (name[3])
9532                 {
9533                   case 'c':
9534                     if (name[4] == 't' &&
9535                         name[5] == 'l')
9536                     {                             /* shmctl     */
9537                       return -KEY_shmctl;
9538                     }
9539
9540                     goto unknown;
9541
9542                   case 'g':
9543                     if (name[4] == 'e' &&
9544                         name[5] == 't')
9545                     {                             /* shmget     */
9546                       return -KEY_shmget;
9547                     }
9548
9549                     goto unknown;
9550
9551                   default:
9552                     goto unknown;
9553                 }
9554               }
9555
9556               goto unknown;
9557
9558             case 'o':
9559               if (name[2] == 'c' &&
9560                   name[3] == 'k' &&
9561                   name[4] == 'e' &&
9562                   name[5] == 't')
9563               {                                   /* socket     */
9564                 return -KEY_socket;
9565               }
9566
9567               goto unknown;
9568
9569             case 'p':
9570               if (name[2] == 'l' &&
9571                   name[3] == 'i' &&
9572                   name[4] == 'c' &&
9573                   name[5] == 'e')
9574               {                                   /* splice     */
9575                 return -KEY_splice;
9576               }
9577
9578               goto unknown;
9579
9580             case 'u':
9581               if (name[2] == 'b' &&
9582                   name[3] == 's' &&
9583                   name[4] == 't' &&
9584                   name[5] == 'r')
9585               {                                   /* substr     */
9586                 return -KEY_substr;
9587               }
9588
9589               goto unknown;
9590
9591             case 'y':
9592               if (name[2] == 's' &&
9593                   name[3] == 't' &&
9594                   name[4] == 'e' &&
9595                   name[5] == 'm')
9596               {                                   /* system     */
9597                 return -KEY_system;
9598               }
9599
9600               goto unknown;
9601
9602             default:
9603               goto unknown;
9604           }
9605
9606         case 'u':
9607           if (name[1] == 'n')
9608           {
9609             switch (name[2])
9610             {
9611               case 'l':
9612                 switch (name[3])
9613                 {
9614                   case 'e':
9615                     if (name[4] == 's' &&
9616                         name[5] == 's')
9617                     {                             /* unless     */
9618                       return KEY_unless;
9619                     }
9620
9621                     goto unknown;
9622
9623                   case 'i':
9624                     if (name[4] == 'n' &&
9625                         name[5] == 'k')
9626                     {                             /* unlink     */
9627                       return -KEY_unlink;
9628                     }
9629
9630                     goto unknown;
9631
9632                   default:
9633                     goto unknown;
9634                 }
9635
9636               case 'p':
9637                 if (name[3] == 'a' &&
9638                     name[4] == 'c' &&
9639                     name[5] == 'k')
9640                 {                                 /* unpack     */
9641                   return -KEY_unpack;
9642                 }
9643
9644                 goto unknown;
9645
9646               default:
9647                 goto unknown;
9648             }
9649           }
9650
9651           goto unknown;
9652
9653         case 'v':
9654           if (name[1] == 'a' &&
9655               name[2] == 'l' &&
9656               name[3] == 'u' &&
9657               name[4] == 'e' &&
9658               name[5] == 's')
9659           {                                       /* values     */
9660             return -KEY_values;
9661           }
9662
9663           goto unknown;
9664
9665         default:
9666           goto unknown;
9667       }
9668
9669     case 7: /* 29 tokens of length 7 */
9670       switch (name[0])
9671       {
9672         case 'D':
9673           if (name[1] == 'E' &&
9674               name[2] == 'S' &&
9675               name[3] == 'T' &&
9676               name[4] == 'R' &&
9677               name[5] == 'O' &&
9678               name[6] == 'Y')
9679           {                                       /* DESTROY    */
9680             return KEY_DESTROY;
9681           }
9682
9683           goto unknown;
9684
9685         case '_':
9686           if (name[1] == '_' &&
9687               name[2] == 'E' &&
9688               name[3] == 'N' &&
9689               name[4] == 'D' &&
9690               name[5] == '_' &&
9691               name[6] == '_')
9692           {                                       /* __END__    */
9693             return KEY___END__;
9694           }
9695
9696           goto unknown;
9697
9698         case 'b':
9699           if (name[1] == 'i' &&
9700               name[2] == 'n' &&
9701               name[3] == 'm' &&
9702               name[4] == 'o' &&
9703               name[5] == 'd' &&
9704               name[6] == 'e')
9705           {                                       /* binmode    */
9706             return -KEY_binmode;
9707           }
9708
9709           goto unknown;
9710
9711         case 'c':
9712           if (name[1] == 'o' &&
9713               name[2] == 'n' &&
9714               name[3] == 'n' &&
9715               name[4] == 'e' &&
9716               name[5] == 'c' &&
9717               name[6] == 't')
9718           {                                       /* connect    */
9719             return -KEY_connect;
9720           }
9721
9722           goto unknown;
9723
9724         case 'd':
9725           switch (name[1])
9726           {
9727             case 'b':
9728               if (name[2] == 'm' &&
9729                   name[3] == 'o' &&
9730                   name[4] == 'p' &&
9731                   name[5] == 'e' &&
9732                   name[6] == 'n')
9733               {                                   /* dbmopen    */
9734                 return -KEY_dbmopen;
9735               }
9736
9737               goto unknown;
9738
9739             case 'e':
9740               if (name[2] == 'f')
9741               {
9742                 switch (name[3])
9743                 {
9744                   case 'a':
9745                     if (name[4] == 'u' &&
9746                         name[5] == 'l' &&
9747                         name[6] == 't')
9748                     {                             /* default    */
9749                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9750                     }
9751
9752                     goto unknown;
9753
9754                   case 'i':
9755                     if (name[4] == 'n' &&
9756                         name[5] == 'e' &&
9757                         name[6] == 'd')
9758                     {                             /* defined    */
9759                       return KEY_defined;
9760                     }
9761
9762                     goto unknown;
9763
9764                   default:
9765                     goto unknown;
9766                 }
9767               }
9768
9769               goto unknown;
9770
9771             default:
9772               goto unknown;
9773           }
9774
9775         case 'f':
9776           if (name[1] == 'o' &&
9777               name[2] == 'r' &&
9778               name[3] == 'e' &&
9779               name[4] == 'a' &&
9780               name[5] == 'c' &&
9781               name[6] == 'h')
9782           {                                       /* foreach    */
9783             return KEY_foreach;
9784           }
9785
9786           goto unknown;
9787
9788         case 'g':
9789           if (name[1] == 'e' &&
9790               name[2] == 't' &&
9791               name[3] == 'p')
9792           {
9793             switch (name[4])
9794             {
9795               case 'g':
9796                 if (name[5] == 'r' &&
9797                     name[6] == 'p')
9798                 {                                 /* getpgrp    */
9799                   return -KEY_getpgrp;
9800                 }
9801
9802                 goto unknown;
9803
9804               case 'p':
9805                 if (name[5] == 'i' &&
9806                     name[6] == 'd')
9807                 {                                 /* getppid    */
9808                   return -KEY_getppid;
9809                 }
9810
9811                 goto unknown;
9812
9813               default:
9814                 goto unknown;
9815             }
9816           }
9817
9818           goto unknown;
9819
9820         case 'l':
9821           if (name[1] == 'c' &&
9822               name[2] == 'f' &&
9823               name[3] == 'i' &&
9824               name[4] == 'r' &&
9825               name[5] == 's' &&
9826               name[6] == 't')
9827           {                                       /* lcfirst    */
9828             return -KEY_lcfirst;
9829           }
9830
9831           goto unknown;
9832
9833         case 'o':
9834           if (name[1] == 'p' &&
9835               name[2] == 'e' &&
9836               name[3] == 'n' &&
9837               name[4] == 'd' &&
9838               name[5] == 'i' &&
9839               name[6] == 'r')
9840           {                                       /* opendir    */
9841             return -KEY_opendir;
9842           }
9843
9844           goto unknown;
9845
9846         case 'p':
9847           if (name[1] == 'a' &&
9848               name[2] == 'c' &&
9849               name[3] == 'k' &&
9850               name[4] == 'a' &&
9851               name[5] == 'g' &&
9852               name[6] == 'e')
9853           {                                       /* package    */
9854             return KEY_package;
9855           }
9856
9857           goto unknown;
9858
9859         case 'r':
9860           if (name[1] == 'e')
9861           {
9862             switch (name[2])
9863             {
9864               case 'a':
9865                 if (name[3] == 'd' &&
9866                     name[4] == 'd' &&
9867                     name[5] == 'i' &&
9868                     name[6] == 'r')
9869                 {                                 /* readdir    */
9870                   return -KEY_readdir;
9871                 }
9872
9873                 goto unknown;
9874
9875               case 'q':
9876                 if (name[3] == 'u' &&
9877                     name[4] == 'i' &&
9878                     name[5] == 'r' &&
9879                     name[6] == 'e')
9880                 {                                 /* require    */
9881                   return KEY_require;
9882                 }
9883
9884                 goto unknown;
9885
9886               case 'v':
9887                 if (name[3] == 'e' &&
9888                     name[4] == 'r' &&
9889                     name[5] == 's' &&
9890                     name[6] == 'e')
9891                 {                                 /* reverse    */
9892                   return -KEY_reverse;
9893                 }
9894
9895                 goto unknown;
9896
9897               default:
9898                 goto unknown;
9899             }
9900           }
9901
9902           goto unknown;
9903
9904         case 's':
9905           switch (name[1])
9906           {
9907             case 'e':
9908               switch (name[2])
9909               {
9910                 case 'e':
9911                   if (name[3] == 'k' &&
9912                       name[4] == 'd' &&
9913                       name[5] == 'i' &&
9914                       name[6] == 'r')
9915                   {                               /* seekdir    */
9916                     return -KEY_seekdir;
9917                   }
9918
9919                   goto unknown;
9920
9921                 case 't':
9922                   if (name[3] == 'p' &&
9923                       name[4] == 'g' &&
9924                       name[5] == 'r' &&
9925                       name[6] == 'p')
9926                   {                               /* setpgrp    */
9927                     return -KEY_setpgrp;
9928                   }
9929
9930                   goto unknown;
9931
9932                 default:
9933                   goto unknown;
9934               }
9935
9936             case 'h':
9937               if (name[2] == 'm' &&
9938                   name[3] == 'r' &&
9939                   name[4] == 'e' &&
9940                   name[5] == 'a' &&
9941                   name[6] == 'd')
9942               {                                   /* shmread    */
9943                 return -KEY_shmread;
9944               }
9945
9946               goto unknown;
9947
9948             case 'p':
9949               if (name[2] == 'r' &&
9950                   name[3] == 'i' &&
9951                   name[4] == 'n' &&
9952                   name[5] == 't' &&
9953                   name[6] == 'f')
9954               {                                   /* sprintf    */
9955                 return -KEY_sprintf;
9956               }
9957
9958               goto unknown;
9959
9960             case 'y':
9961               switch (name[2])
9962               {
9963                 case 'm':
9964                   if (name[3] == 'l' &&
9965                       name[4] == 'i' &&
9966                       name[5] == 'n' &&
9967                       name[6] == 'k')
9968                   {                               /* symlink    */
9969                     return -KEY_symlink;
9970                   }
9971
9972                   goto unknown;
9973
9974                 case 's':
9975                   switch (name[3])
9976                   {
9977                     case 'c':
9978                       if (name[4] == 'a' &&
9979                           name[5] == 'l' &&
9980                           name[6] == 'l')
9981                       {                           /* syscall    */
9982                         return -KEY_syscall;
9983                       }
9984
9985                       goto unknown;
9986
9987                     case 'o':
9988                       if (name[4] == 'p' &&
9989                           name[5] == 'e' &&
9990                           name[6] == 'n')
9991                       {                           /* sysopen    */
9992                         return -KEY_sysopen;
9993                       }
9994
9995                       goto unknown;
9996
9997                     case 'r':
9998                       if (name[4] == 'e' &&
9999                           name[5] == 'a' &&
10000                           name[6] == 'd')
10001                       {                           /* sysread    */
10002                         return -KEY_sysread;
10003                       }
10004
10005                       goto unknown;
10006
10007                     case 's':
10008                       if (name[4] == 'e' &&
10009                           name[5] == 'e' &&
10010                           name[6] == 'k')
10011                       {                           /* sysseek    */
10012                         return -KEY_sysseek;
10013                       }
10014
10015                       goto unknown;
10016
10017                     default:
10018                       goto unknown;
10019                   }
10020
10021                 default:
10022                   goto unknown;
10023               }
10024
10025             default:
10026               goto unknown;
10027           }
10028
10029         case 't':
10030           if (name[1] == 'e' &&
10031               name[2] == 'l' &&
10032               name[3] == 'l' &&
10033               name[4] == 'd' &&
10034               name[5] == 'i' &&
10035               name[6] == 'r')
10036           {                                       /* telldir    */
10037             return -KEY_telldir;
10038           }
10039
10040           goto unknown;
10041
10042         case 'u':
10043           switch (name[1])
10044           {
10045             case 'c':
10046               if (name[2] == 'f' &&
10047                   name[3] == 'i' &&
10048                   name[4] == 'r' &&
10049                   name[5] == 's' &&
10050                   name[6] == 't')
10051               {                                   /* ucfirst    */
10052                 return -KEY_ucfirst;
10053               }
10054
10055               goto unknown;
10056
10057             case 'n':
10058               if (name[2] == 's' &&
10059                   name[3] == 'h' &&
10060                   name[4] == 'i' &&
10061                   name[5] == 'f' &&
10062                   name[6] == 't')
10063               {                                   /* unshift    */
10064                 return -KEY_unshift;
10065               }
10066
10067               goto unknown;
10068
10069             default:
10070               goto unknown;
10071           }
10072
10073         case 'w':
10074           if (name[1] == 'a' &&
10075               name[2] == 'i' &&
10076               name[3] == 't' &&
10077               name[4] == 'p' &&
10078               name[5] == 'i' &&
10079               name[6] == 'd')
10080           {                                       /* waitpid    */
10081             return -KEY_waitpid;
10082           }
10083
10084           goto unknown;
10085
10086         default:
10087           goto unknown;
10088       }
10089
10090     case 8: /* 26 tokens of length 8 */
10091       switch (name[0])
10092       {
10093         case 'A':
10094           if (name[1] == 'U' &&
10095               name[2] == 'T' &&
10096               name[3] == 'O' &&
10097               name[4] == 'L' &&
10098               name[5] == 'O' &&
10099               name[6] == 'A' &&
10100               name[7] == 'D')
10101           {                                       /* AUTOLOAD   */
10102             return KEY_AUTOLOAD;
10103           }
10104
10105           goto unknown;
10106
10107         case '_':
10108           if (name[1] == '_')
10109           {
10110             switch (name[2])
10111             {
10112               case 'D':
10113                 if (name[3] == 'A' &&
10114                     name[4] == 'T' &&
10115                     name[5] == 'A' &&
10116                     name[6] == '_' &&
10117                     name[7] == '_')
10118                 {                                 /* __DATA__   */
10119                   return KEY___DATA__;
10120                 }
10121
10122                 goto unknown;
10123
10124               case 'F':
10125                 if (name[3] == 'I' &&
10126                     name[4] == 'L' &&
10127                     name[5] == 'E' &&
10128                     name[6] == '_' &&
10129                     name[7] == '_')
10130                 {                                 /* __FILE__   */
10131                   return -KEY___FILE__;
10132                 }
10133
10134                 goto unknown;
10135
10136               case 'L':
10137                 if (name[3] == 'I' &&
10138                     name[4] == 'N' &&
10139                     name[5] == 'E' &&
10140                     name[6] == '_' &&
10141                     name[7] == '_')
10142                 {                                 /* __LINE__   */
10143                   return -KEY___LINE__;
10144                 }
10145
10146                 goto unknown;
10147
10148               default:
10149                 goto unknown;
10150             }
10151           }
10152
10153           goto unknown;
10154
10155         case 'c':
10156           switch (name[1])
10157           {
10158             case 'l':
10159               if (name[2] == 'o' &&
10160                   name[3] == 's' &&
10161                   name[4] == 'e' &&
10162                   name[5] == 'd' &&
10163                   name[6] == 'i' &&
10164                   name[7] == 'r')
10165               {                                   /* closedir   */
10166                 return -KEY_closedir;
10167               }
10168
10169               goto unknown;
10170
10171             case 'o':
10172               if (name[2] == 'n' &&
10173                   name[3] == 't' &&
10174                   name[4] == 'i' &&
10175                   name[5] == 'n' &&
10176                   name[6] == 'u' &&
10177                   name[7] == 'e')
10178               {                                   /* continue   */
10179                 return -KEY_continue;
10180               }
10181
10182               goto unknown;
10183
10184             default:
10185               goto unknown;
10186           }
10187
10188         case 'd':
10189           if (name[1] == 'b' &&
10190               name[2] == 'm' &&
10191               name[3] == 'c' &&
10192               name[4] == 'l' &&
10193               name[5] == 'o' &&
10194               name[6] == 's' &&
10195               name[7] == 'e')
10196           {                                       /* dbmclose   */
10197             return -KEY_dbmclose;
10198           }
10199
10200           goto unknown;
10201
10202         case 'e':
10203           if (name[1] == 'n' &&
10204               name[2] == 'd')
10205           {
10206             switch (name[3])
10207             {
10208               case 'g':
10209                 if (name[4] == 'r' &&
10210                     name[5] == 'e' &&
10211                     name[6] == 'n' &&
10212                     name[7] == 't')
10213                 {                                 /* endgrent   */
10214                   return -KEY_endgrent;
10215                 }
10216
10217                 goto unknown;
10218
10219               case 'p':
10220                 if (name[4] == 'w' &&
10221                     name[5] == 'e' &&
10222                     name[6] == 'n' &&
10223                     name[7] == 't')
10224                 {                                 /* endpwent   */
10225                   return -KEY_endpwent;
10226                 }
10227
10228                 goto unknown;
10229
10230               default:
10231                 goto unknown;
10232             }
10233           }
10234
10235           goto unknown;
10236
10237         case 'f':
10238           if (name[1] == 'o' &&
10239               name[2] == 'r' &&
10240               name[3] == 'm' &&
10241               name[4] == 'l' &&
10242               name[5] == 'i' &&
10243               name[6] == 'n' &&
10244               name[7] == 'e')
10245           {                                       /* formline   */
10246             return -KEY_formline;
10247           }
10248
10249           goto unknown;
10250
10251         case 'g':
10252           if (name[1] == 'e' &&
10253               name[2] == 't')
10254           {
10255             switch (name[3])
10256             {
10257               case 'g':
10258                 if (name[4] == 'r')
10259                 {
10260                   switch (name[5])
10261                   {
10262                     case 'e':
10263                       if (name[6] == 'n' &&
10264                           name[7] == 't')
10265                       {                           /* getgrent   */
10266                         return -KEY_getgrent;
10267                       }
10268
10269                       goto unknown;
10270
10271                     case 'g':
10272                       if (name[6] == 'i' &&
10273                           name[7] == 'd')
10274                       {                           /* getgrgid   */
10275                         return -KEY_getgrgid;
10276                       }
10277
10278                       goto unknown;
10279
10280                     case 'n':
10281                       if (name[6] == 'a' &&
10282                           name[7] == 'm')
10283                       {                           /* getgrnam   */
10284                         return -KEY_getgrnam;
10285                       }
10286
10287                       goto unknown;
10288
10289                     default:
10290                       goto unknown;
10291                   }
10292                 }
10293
10294                 goto unknown;
10295
10296               case 'l':
10297                 if (name[4] == 'o' &&
10298                     name[5] == 'g' &&
10299                     name[6] == 'i' &&
10300                     name[7] == 'n')
10301                 {                                 /* getlogin   */
10302                   return -KEY_getlogin;
10303                 }
10304
10305                 goto unknown;
10306
10307               case 'p':
10308                 if (name[4] == 'w')
10309                 {
10310                   switch (name[5])
10311                   {
10312                     case 'e':
10313                       if (name[6] == 'n' &&
10314                           name[7] == 't')
10315                       {                           /* getpwent   */
10316                         return -KEY_getpwent;
10317                       }
10318
10319                       goto unknown;
10320
10321                     case 'n':
10322                       if (name[6] == 'a' &&
10323                           name[7] == 'm')
10324                       {                           /* getpwnam   */
10325                         return -KEY_getpwnam;
10326                       }
10327
10328                       goto unknown;
10329
10330                     case 'u':
10331                       if (name[6] == 'i' &&
10332                           name[7] == 'd')
10333                       {                           /* getpwuid   */
10334                         return -KEY_getpwuid;
10335                       }
10336
10337                       goto unknown;
10338
10339                     default:
10340                       goto unknown;
10341                   }
10342                 }
10343
10344                 goto unknown;
10345
10346               default:
10347                 goto unknown;
10348             }
10349           }
10350
10351           goto unknown;
10352
10353         case 'r':
10354           if (name[1] == 'e' &&
10355               name[2] == 'a' &&
10356               name[3] == 'd')
10357           {
10358             switch (name[4])
10359             {
10360               case 'l':
10361                 if (name[5] == 'i' &&
10362                     name[6] == 'n')
10363                 {
10364                   switch (name[7])
10365                   {
10366                     case 'e':
10367                       {                           /* readline   */
10368                         return -KEY_readline;
10369                       }
10370
10371                     case 'k':
10372                       {                           /* readlink   */
10373                         return -KEY_readlink;
10374                       }
10375
10376                     default:
10377                       goto unknown;
10378                   }
10379                 }
10380
10381                 goto unknown;
10382
10383               case 'p':
10384                 if (name[5] == 'i' &&
10385                     name[6] == 'p' &&
10386                     name[7] == 'e')
10387                 {                                 /* readpipe   */
10388                   return -KEY_readpipe;
10389                 }
10390
10391                 goto unknown;
10392
10393               default:
10394                 goto unknown;
10395             }
10396           }
10397
10398           goto unknown;
10399
10400         case 's':
10401           switch (name[1])
10402           {
10403             case 'e':
10404               if (name[2] == 't')
10405               {
10406                 switch (name[3])
10407                 {
10408                   case 'g':
10409                     if (name[4] == 'r' &&
10410                         name[5] == 'e' &&
10411                         name[6] == 'n' &&
10412                         name[7] == 't')
10413                     {                             /* setgrent   */
10414                       return -KEY_setgrent;
10415                     }
10416
10417                     goto unknown;
10418
10419                   case 'p':
10420                     if (name[4] == 'w' &&
10421                         name[5] == 'e' &&
10422                         name[6] == 'n' &&
10423                         name[7] == 't')
10424                     {                             /* setpwent   */
10425                       return -KEY_setpwent;
10426                     }
10427
10428                     goto unknown;
10429
10430                   default:
10431                     goto unknown;
10432                 }
10433               }
10434
10435               goto unknown;
10436
10437             case 'h':
10438               switch (name[2])
10439               {
10440                 case 'm':
10441                   if (name[3] == 'w' &&
10442                       name[4] == 'r' &&
10443                       name[5] == 'i' &&
10444                       name[6] == 't' &&
10445                       name[7] == 'e')
10446                   {                               /* shmwrite   */
10447                     return -KEY_shmwrite;
10448                   }
10449
10450                   goto unknown;
10451
10452                 case 'u':
10453                   if (name[3] == 't' &&
10454                       name[4] == 'd' &&
10455                       name[5] == 'o' &&
10456                       name[6] == 'w' &&
10457                       name[7] == 'n')
10458                   {                               /* shutdown   */
10459                     return -KEY_shutdown;
10460                   }
10461
10462                   goto unknown;
10463
10464                 default:
10465                   goto unknown;
10466               }
10467
10468             case 'y':
10469               if (name[2] == 's' &&
10470                   name[3] == 'w' &&
10471                   name[4] == 'r' &&
10472                   name[5] == 'i' &&
10473                   name[6] == 't' &&
10474                   name[7] == 'e')
10475               {                                   /* syswrite   */
10476                 return -KEY_syswrite;
10477               }
10478
10479               goto unknown;
10480
10481             default:
10482               goto unknown;
10483           }
10484
10485         case 't':
10486           if (name[1] == 'r' &&
10487               name[2] == 'u' &&
10488               name[3] == 'n' &&
10489               name[4] == 'c' &&
10490               name[5] == 'a' &&
10491               name[6] == 't' &&
10492               name[7] == 'e')
10493           {                                       /* truncate   */
10494             return -KEY_truncate;
10495           }
10496
10497           goto unknown;
10498
10499         default:
10500           goto unknown;
10501       }
10502
10503     case 9: /* 9 tokens of length 9 */
10504       switch (name[0])
10505       {
10506         case 'U':
10507           if (name[1] == 'N' &&
10508               name[2] == 'I' &&
10509               name[3] == 'T' &&
10510               name[4] == 'C' &&
10511               name[5] == 'H' &&
10512               name[6] == 'E' &&
10513               name[7] == 'C' &&
10514               name[8] == 'K')
10515           {                                       /* UNITCHECK  */
10516             return KEY_UNITCHECK;
10517           }
10518
10519           goto unknown;
10520
10521         case 'e':
10522           if (name[1] == 'n' &&
10523               name[2] == 'd' &&
10524               name[3] == 'n' &&
10525               name[4] == 'e' &&
10526               name[5] == 't' &&
10527               name[6] == 'e' &&
10528               name[7] == 'n' &&
10529               name[8] == 't')
10530           {                                       /* endnetent  */
10531             return -KEY_endnetent;
10532           }
10533
10534           goto unknown;
10535
10536         case 'g':
10537           if (name[1] == 'e' &&
10538               name[2] == 't' &&
10539               name[3] == 'n' &&
10540               name[4] == 'e' &&
10541               name[5] == 't' &&
10542               name[6] == 'e' &&
10543               name[7] == 'n' &&
10544               name[8] == 't')
10545           {                                       /* getnetent  */
10546             return -KEY_getnetent;
10547           }
10548
10549           goto unknown;
10550
10551         case 'l':
10552           if (name[1] == 'o' &&
10553               name[2] == 'c' &&
10554               name[3] == 'a' &&
10555               name[4] == 'l' &&
10556               name[5] == 't' &&
10557               name[6] == 'i' &&
10558               name[7] == 'm' &&
10559               name[8] == 'e')
10560           {                                       /* localtime  */
10561             return -KEY_localtime;
10562           }
10563
10564           goto unknown;
10565
10566         case 'p':
10567           if (name[1] == 'r' &&
10568               name[2] == 'o' &&
10569               name[3] == 't' &&
10570               name[4] == 'o' &&
10571               name[5] == 't' &&
10572               name[6] == 'y' &&
10573               name[7] == 'p' &&
10574               name[8] == 'e')
10575           {                                       /* prototype  */
10576             return KEY_prototype;
10577           }
10578
10579           goto unknown;
10580
10581         case 'q':
10582           if (name[1] == 'u' &&
10583               name[2] == 'o' &&
10584               name[3] == 't' &&
10585               name[4] == 'e' &&
10586               name[5] == 'm' &&
10587               name[6] == 'e' &&
10588               name[7] == 't' &&
10589               name[8] == 'a')
10590           {                                       /* quotemeta  */
10591             return -KEY_quotemeta;
10592           }
10593
10594           goto unknown;
10595
10596         case 'r':
10597           if (name[1] == 'e' &&
10598               name[2] == 'w' &&
10599               name[3] == 'i' &&
10600               name[4] == 'n' &&
10601               name[5] == 'd' &&
10602               name[6] == 'd' &&
10603               name[7] == 'i' &&
10604               name[8] == 'r')
10605           {                                       /* rewinddir  */
10606             return -KEY_rewinddir;
10607           }
10608
10609           goto unknown;
10610
10611         case 's':
10612           if (name[1] == 'e' &&
10613               name[2] == 't' &&
10614               name[3] == 'n' &&
10615               name[4] == 'e' &&
10616               name[5] == 't' &&
10617               name[6] == 'e' &&
10618               name[7] == 'n' &&
10619               name[8] == 't')
10620           {                                       /* setnetent  */
10621             return -KEY_setnetent;
10622           }
10623
10624           goto unknown;
10625
10626         case 'w':
10627           if (name[1] == 'a' &&
10628               name[2] == 'n' &&
10629               name[3] == 't' &&
10630               name[4] == 'a' &&
10631               name[5] == 'r' &&
10632               name[6] == 'r' &&
10633               name[7] == 'a' &&
10634               name[8] == 'y')
10635           {                                       /* wantarray  */
10636             return -KEY_wantarray;
10637           }
10638
10639           goto unknown;
10640
10641         default:
10642           goto unknown;
10643       }
10644
10645     case 10: /* 9 tokens of length 10 */
10646       switch (name[0])
10647       {
10648         case 'e':
10649           if (name[1] == 'n' &&
10650               name[2] == 'd')
10651           {
10652             switch (name[3])
10653             {
10654               case 'h':
10655                 if (name[4] == 'o' &&
10656                     name[5] == 's' &&
10657                     name[6] == 't' &&
10658                     name[7] == 'e' &&
10659                     name[8] == 'n' &&
10660                     name[9] == 't')
10661                 {                                 /* endhostent */
10662                   return -KEY_endhostent;
10663                 }
10664
10665                 goto unknown;
10666
10667               case 's':
10668                 if (name[4] == 'e' &&
10669                     name[5] == 'r' &&
10670                     name[6] == 'v' &&
10671                     name[7] == 'e' &&
10672                     name[8] == 'n' &&
10673                     name[9] == 't')
10674                 {                                 /* endservent */
10675                   return -KEY_endservent;
10676                 }
10677
10678                 goto unknown;
10679
10680               default:
10681                 goto unknown;
10682             }
10683           }
10684
10685           goto unknown;
10686
10687         case 'g':
10688           if (name[1] == 'e' &&
10689               name[2] == 't')
10690           {
10691             switch (name[3])
10692             {
10693               case 'h':
10694                 if (name[4] == 'o' &&
10695                     name[5] == 's' &&
10696                     name[6] == 't' &&
10697                     name[7] == 'e' &&
10698                     name[8] == 'n' &&
10699                     name[9] == 't')
10700                 {                                 /* gethostent */
10701                   return -KEY_gethostent;
10702                 }
10703
10704                 goto unknown;
10705
10706               case 's':
10707                 switch (name[4])
10708                 {
10709                   case 'e':
10710                     if (name[5] == 'r' &&
10711                         name[6] == 'v' &&
10712                         name[7] == 'e' &&
10713                         name[8] == 'n' &&
10714                         name[9] == 't')
10715                     {                             /* getservent */
10716                       return -KEY_getservent;
10717                     }
10718
10719                     goto unknown;
10720
10721                   case 'o':
10722                     if (name[5] == 'c' &&
10723                         name[6] == 'k' &&
10724                         name[7] == 'o' &&
10725                         name[8] == 'p' &&
10726                         name[9] == 't')
10727                     {                             /* getsockopt */
10728                       return -KEY_getsockopt;
10729                     }
10730
10731                     goto unknown;
10732
10733                   default:
10734                     goto unknown;
10735                 }
10736
10737               default:
10738                 goto unknown;
10739             }
10740           }
10741
10742           goto unknown;
10743
10744         case 's':
10745           switch (name[1])
10746           {
10747             case 'e':
10748               if (name[2] == 't')
10749               {
10750                 switch (name[3])
10751                 {
10752                   case 'h':
10753                     if (name[4] == 'o' &&
10754                         name[5] == 's' &&
10755                         name[6] == 't' &&
10756                         name[7] == 'e' &&
10757                         name[8] == 'n' &&
10758                         name[9] == 't')
10759                     {                             /* sethostent */
10760                       return -KEY_sethostent;
10761                     }
10762
10763                     goto unknown;
10764
10765                   case 's':
10766                     switch (name[4])
10767                     {
10768                       case 'e':
10769                         if (name[5] == 'r' &&
10770                             name[6] == 'v' &&
10771                             name[7] == 'e' &&
10772                             name[8] == 'n' &&
10773                             name[9] == 't')
10774                         {                         /* setservent */
10775                           return -KEY_setservent;
10776                         }
10777
10778                         goto unknown;
10779
10780                       case 'o':
10781                         if (name[5] == 'c' &&
10782                             name[6] == 'k' &&
10783                             name[7] == 'o' &&
10784                             name[8] == 'p' &&
10785                             name[9] == 't')
10786                         {                         /* setsockopt */
10787                           return -KEY_setsockopt;
10788                         }
10789
10790                         goto unknown;
10791
10792                       default:
10793                         goto unknown;
10794                     }
10795
10796                   default:
10797                     goto unknown;
10798                 }
10799               }
10800
10801               goto unknown;
10802
10803             case 'o':
10804               if (name[2] == 'c' &&
10805                   name[3] == 'k' &&
10806                   name[4] == 'e' &&
10807                   name[5] == 't' &&
10808                   name[6] == 'p' &&
10809                   name[7] == 'a' &&
10810                   name[8] == 'i' &&
10811                   name[9] == 'r')
10812               {                                   /* socketpair */
10813                 return -KEY_socketpair;
10814               }
10815
10816               goto unknown;
10817
10818             default:
10819               goto unknown;
10820           }
10821
10822         default:
10823           goto unknown;
10824       }
10825
10826     case 11: /* 8 tokens of length 11 */
10827       switch (name[0])
10828       {
10829         case '_':
10830           if (name[1] == '_' &&
10831               name[2] == 'P' &&
10832               name[3] == 'A' &&
10833               name[4] == 'C' &&
10834               name[5] == 'K' &&
10835               name[6] == 'A' &&
10836               name[7] == 'G' &&
10837               name[8] == 'E' &&
10838               name[9] == '_' &&
10839               name[10] == '_')
10840           {                                       /* __PACKAGE__ */
10841             return -KEY___PACKAGE__;
10842           }
10843
10844           goto unknown;
10845
10846         case 'e':
10847           if (name[1] == 'n' &&
10848               name[2] == 'd' &&
10849               name[3] == 'p' &&
10850               name[4] == 'r' &&
10851               name[5] == 'o' &&
10852               name[6] == 't' &&
10853               name[7] == 'o' &&
10854               name[8] == 'e' &&
10855               name[9] == 'n' &&
10856               name[10] == 't')
10857           {                                       /* endprotoent */
10858             return -KEY_endprotoent;
10859           }
10860
10861           goto unknown;
10862
10863         case 'g':
10864           if (name[1] == 'e' &&
10865               name[2] == 't')
10866           {
10867             switch (name[3])
10868             {
10869               case 'p':
10870                 switch (name[4])
10871                 {
10872                   case 'e':
10873                     if (name[5] == 'e' &&
10874                         name[6] == 'r' &&
10875                         name[7] == 'n' &&
10876                         name[8] == 'a' &&
10877                         name[9] == 'm' &&
10878                         name[10] == 'e')
10879                     {                             /* getpeername */
10880                       return -KEY_getpeername;
10881                     }
10882
10883                     goto unknown;
10884
10885                   case 'r':
10886                     switch (name[5])
10887                     {
10888                       case 'i':
10889                         if (name[6] == 'o' &&
10890                             name[7] == 'r' &&
10891                             name[8] == 'i' &&
10892                             name[9] == 't' &&
10893                             name[10] == 'y')
10894                         {                         /* getpriority */
10895                           return -KEY_getpriority;
10896                         }
10897
10898                         goto unknown;
10899
10900                       case 'o':
10901                         if (name[6] == 't' &&
10902                             name[7] == 'o' &&
10903                             name[8] == 'e' &&
10904                             name[9] == 'n' &&
10905                             name[10] == 't')
10906                         {                         /* getprotoent */
10907                           return -KEY_getprotoent;
10908                         }
10909
10910                         goto unknown;
10911
10912                       default:
10913                         goto unknown;
10914                     }
10915
10916                   default:
10917                     goto unknown;
10918                 }
10919
10920               case 's':
10921                 if (name[4] == 'o' &&
10922                     name[5] == 'c' &&
10923                     name[6] == 'k' &&
10924                     name[7] == 'n' &&
10925                     name[8] == 'a' &&
10926                     name[9] == 'm' &&
10927                     name[10] == 'e')
10928                 {                                 /* getsockname */
10929                   return -KEY_getsockname;
10930                 }
10931
10932                 goto unknown;
10933
10934               default:
10935                 goto unknown;
10936             }
10937           }
10938
10939           goto unknown;
10940
10941         case 's':
10942           if (name[1] == 'e' &&
10943               name[2] == 't' &&
10944               name[3] == 'p' &&
10945               name[4] == 'r')
10946           {
10947             switch (name[5])
10948             {
10949               case 'i':
10950                 if (name[6] == 'o' &&
10951                     name[7] == 'r' &&
10952                     name[8] == 'i' &&
10953                     name[9] == 't' &&
10954                     name[10] == 'y')
10955                 {                                 /* setpriority */
10956                   return -KEY_setpriority;
10957                 }
10958
10959                 goto unknown;
10960
10961               case 'o':
10962                 if (name[6] == 't' &&
10963                     name[7] == 'o' &&
10964                     name[8] == 'e' &&
10965                     name[9] == 'n' &&
10966                     name[10] == 't')
10967                 {                                 /* setprotoent */
10968                   return -KEY_setprotoent;
10969                 }
10970
10971                 goto unknown;
10972
10973               default:
10974                 goto unknown;
10975             }
10976           }
10977
10978           goto unknown;
10979
10980         default:
10981           goto unknown;
10982       }
10983
10984     case 12: /* 2 tokens of length 12 */
10985       if (name[0] == 'g' &&
10986           name[1] == 'e' &&
10987           name[2] == 't' &&
10988           name[3] == 'n' &&
10989           name[4] == 'e' &&
10990           name[5] == 't' &&
10991           name[6] == 'b' &&
10992           name[7] == 'y')
10993       {
10994         switch (name[8])
10995         {
10996           case 'a':
10997             if (name[9] == 'd' &&
10998                 name[10] == 'd' &&
10999                 name[11] == 'r')
11000             {                                     /* getnetbyaddr */
11001               return -KEY_getnetbyaddr;
11002             }
11003
11004             goto unknown;
11005
11006           case 'n':
11007             if (name[9] == 'a' &&
11008                 name[10] == 'm' &&
11009                 name[11] == 'e')
11010             {                                     /* getnetbyname */
11011               return -KEY_getnetbyname;
11012             }
11013
11014             goto unknown;
11015
11016           default:
11017             goto unknown;
11018         }
11019       }
11020
11021       goto unknown;
11022
11023     case 13: /* 4 tokens of length 13 */
11024       if (name[0] == 'g' &&
11025           name[1] == 'e' &&
11026           name[2] == 't')
11027       {
11028         switch (name[3])
11029         {
11030           case 'h':
11031             if (name[4] == 'o' &&
11032                 name[5] == 's' &&
11033                 name[6] == 't' &&
11034                 name[7] == 'b' &&
11035                 name[8] == 'y')
11036             {
11037               switch (name[9])
11038               {
11039                 case 'a':
11040                   if (name[10] == 'd' &&
11041                       name[11] == 'd' &&
11042                       name[12] == 'r')
11043                   {                               /* gethostbyaddr */
11044                     return -KEY_gethostbyaddr;
11045                   }
11046
11047                   goto unknown;
11048
11049                 case 'n':
11050                   if (name[10] == 'a' &&
11051                       name[11] == 'm' &&
11052                       name[12] == 'e')
11053                   {                               /* gethostbyname */
11054                     return -KEY_gethostbyname;
11055                   }
11056
11057                   goto unknown;
11058
11059                 default:
11060                   goto unknown;
11061               }
11062             }
11063
11064             goto unknown;
11065
11066           case 's':
11067             if (name[4] == 'e' &&
11068                 name[5] == 'r' &&
11069                 name[6] == 'v' &&
11070                 name[7] == 'b' &&
11071                 name[8] == 'y')
11072             {
11073               switch (name[9])
11074               {
11075                 case 'n':
11076                   if (name[10] == 'a' &&
11077                       name[11] == 'm' &&
11078                       name[12] == 'e')
11079                   {                               /* getservbyname */
11080                     return -KEY_getservbyname;
11081                   }
11082
11083                   goto unknown;
11084
11085                 case 'p':
11086                   if (name[10] == 'o' &&
11087                       name[11] == 'r' &&
11088                       name[12] == 't')
11089                   {                               /* getservbyport */
11090                     return -KEY_getservbyport;
11091                   }
11092
11093                   goto unknown;
11094
11095                 default:
11096                   goto unknown;
11097               }
11098             }
11099
11100             goto unknown;
11101
11102           default:
11103             goto unknown;
11104         }
11105       }
11106
11107       goto unknown;
11108
11109     case 14: /* 1 tokens of length 14 */
11110       if (name[0] == 'g' &&
11111           name[1] == 'e' &&
11112           name[2] == 't' &&
11113           name[3] == 'p' &&
11114           name[4] == 'r' &&
11115           name[5] == 'o' &&
11116           name[6] == 't' &&
11117           name[7] == 'o' &&
11118           name[8] == 'b' &&
11119           name[9] == 'y' &&
11120           name[10] == 'n' &&
11121           name[11] == 'a' &&
11122           name[12] == 'm' &&
11123           name[13] == 'e')
11124       {                                           /* getprotobyname */
11125         return -KEY_getprotobyname;
11126       }
11127
11128       goto unknown;
11129
11130     case 16: /* 1 tokens of length 16 */
11131       if (name[0] == 'g' &&
11132           name[1] == 'e' &&
11133           name[2] == 't' &&
11134           name[3] == 'p' &&
11135           name[4] == 'r' &&
11136           name[5] == 'o' &&
11137           name[6] == 't' &&
11138           name[7] == 'o' &&
11139           name[8] == 'b' &&
11140           name[9] == 'y' &&
11141           name[10] == 'n' &&
11142           name[11] == 'u' &&
11143           name[12] == 'm' &&
11144           name[13] == 'b' &&
11145           name[14] == 'e' &&
11146           name[15] == 'r')
11147       {                                           /* getprotobynumber */
11148         return -KEY_getprotobynumber;
11149       }
11150
11151       goto unknown;
11152
11153     default:
11154       goto unknown;
11155   }
11156
11157 unknown:
11158   return 0;
11159 }
11160
11161 STATIC void
11162 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11163 {
11164     dVAR;
11165
11166     PERL_ARGS_ASSERT_CHECKCOMMA;
11167
11168     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11169         if (ckWARN(WARN_SYNTAX)) {
11170             int level = 1;
11171             const char *w;
11172             for (w = s+2; *w && level; w++) {
11173                 if (*w == '(')
11174                     ++level;
11175                 else if (*w == ')')
11176                     --level;
11177             }
11178             while (isSPACE(*w))
11179                 ++w;
11180             /* the list of chars below is for end of statements or
11181              * block / parens, boolean operators (&&, ||, //) and branch
11182              * constructs (or, and, if, until, unless, while, err, for).
11183              * Not a very solid hack... */
11184             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11185                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11186                             "%s (...) interpreted as function",name);
11187         }
11188     }
11189     while (s < PL_bufend && isSPACE(*s))
11190         s++;
11191     if (*s == '(')
11192         s++;
11193     while (s < PL_bufend && isSPACE(*s))
11194         s++;
11195     if (isIDFIRST_lazy_if(s,UTF)) {
11196         const char * const w = s++;
11197         while (isALNUM_lazy_if(s,UTF))
11198             s++;
11199         while (s < PL_bufend && isSPACE(*s))
11200             s++;
11201         if (*s == ',') {
11202             GV* gv;
11203             if (keyword(w, s - w, 0))
11204                 return;
11205
11206             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11207             if (gv && GvCVu(gv))
11208                 return;
11209             Perl_croak(aTHX_ "No comma allowed after %s", what);
11210         }
11211     }
11212 }
11213
11214 /* Either returns sv, or mortalizes sv and returns a new SV*.
11215    Best used as sv=new_constant(..., sv, ...).
11216    If s, pv are NULL, calls subroutine with one argument,
11217    and type is used with error messages only. */
11218
11219 STATIC SV *
11220 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11221                SV *sv, SV *pv, const char *type, STRLEN typelen)
11222 {
11223     dVAR; dSP;
11224     HV * const table = GvHV(PL_hintgv);          /* ^H */
11225     SV *res;
11226     SV **cvp;
11227     SV *cv, *typesv;
11228     const char *why1 = "", *why2 = "", *why3 = "";
11229
11230     PERL_ARGS_ASSERT_NEW_CONSTANT;
11231
11232     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11233         SV *msg;
11234
11235         why2 = (const char *)
11236             (strEQ(key,"charnames")
11237              ? "(possibly a missing \"use charnames ...\")"
11238              : "");
11239         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11240                             (type ? type: "undef"), why2);
11241
11242         /* This is convoluted and evil ("goto considered harmful")
11243          * but I do not understand the intricacies of all the different
11244          * failure modes of %^H in here.  The goal here is to make
11245          * the most probable error message user-friendly. --jhi */
11246
11247         goto msgdone;
11248
11249     report:
11250         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11251                             (type ? type: "undef"), why1, why2, why3);
11252     msgdone:
11253         yyerror(SvPVX_const(msg));
11254         SvREFCNT_dec(msg);
11255         return sv;
11256     }
11257     cvp = hv_fetch(table, key, keylen, FALSE);
11258     if (!cvp || !SvOK(*cvp)) {
11259         why1 = "$^H{";
11260         why2 = key;
11261         why3 = "} is not defined";
11262         goto report;
11263     }
11264     sv_2mortal(sv);                     /* Parent created it permanently */
11265     cv = *cvp;
11266     if (!pv && s)
11267         pv = newSVpvn_flags(s, len, SVs_TEMP);
11268     if (type && pv)
11269         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11270     else
11271         typesv = &PL_sv_undef;
11272
11273     PUSHSTACKi(PERLSI_OVERLOAD);
11274     ENTER ;
11275     SAVETMPS;
11276
11277     PUSHMARK(SP) ;
11278     EXTEND(sp, 3);
11279     if (pv)
11280         PUSHs(pv);
11281     PUSHs(sv);
11282     if (pv)
11283         PUSHs(typesv);
11284     PUTBACK;
11285     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11286
11287     SPAGAIN ;
11288
11289     /* Check the eval first */
11290     if (!PL_in_eval && SvTRUE(ERRSV)) {
11291         sv_catpvs(ERRSV, "Propagated");
11292         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11293         (void)POPs;
11294         res = SvREFCNT_inc_simple(sv);
11295     }
11296     else {
11297         res = POPs;
11298         SvREFCNT_inc_simple_void(res);
11299     }
11300
11301     PUTBACK ;
11302     FREETMPS ;
11303     LEAVE ;
11304     POPSTACK;
11305
11306     if (!SvOK(res)) {
11307         why1 = "Call to &{$^H{";
11308         why2 = key;
11309         why3 = "}} did not return a defined value";
11310         sv = res;
11311         goto report;
11312     }
11313
11314     return res;
11315 }
11316
11317 /* Returns a NUL terminated string, with the length of the string written to
11318    *slp
11319    */
11320 STATIC char *
11321 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11322 {
11323     dVAR;
11324     register char *d = dest;
11325     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11326
11327     PERL_ARGS_ASSERT_SCAN_WORD;
11328
11329     for (;;) {
11330         if (d >= e)
11331             Perl_croak(aTHX_ ident_too_long);
11332         if (isALNUM(*s))        /* UTF handled below */
11333             *d++ = *s++;
11334         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11335             *d++ = ':';
11336             *d++ = ':';
11337             s++;
11338         }
11339         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11340             *d++ = *s++;
11341             *d++ = *s++;
11342         }
11343         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11344             char *t = s + UTF8SKIP(s);
11345             size_t len;
11346             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11347                 t += UTF8SKIP(t);
11348             len = t - s;
11349             if (d + len > e)
11350                 Perl_croak(aTHX_ ident_too_long);
11351             Copy(s, d, len, char);
11352             d += len;
11353             s = t;
11354         }
11355         else {
11356             *d = '\0';
11357             *slp = d - dest;
11358             return s;
11359         }
11360     }
11361 }
11362
11363 STATIC char *
11364 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11365 {
11366     dVAR;
11367     char *bracket = NULL;
11368     char funny = *s++;
11369     register char *d = dest;
11370     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11371
11372     PERL_ARGS_ASSERT_SCAN_IDENT;
11373
11374     if (isSPACE(*s))
11375         s = PEEKSPACE(s);
11376     if (isDIGIT(*s)) {
11377         while (isDIGIT(*s)) {
11378             if (d >= e)
11379                 Perl_croak(aTHX_ ident_too_long);
11380             *d++ = *s++;
11381         }
11382     }
11383     else {
11384         for (;;) {
11385             if (d >= e)
11386                 Perl_croak(aTHX_ ident_too_long);
11387             if (isALNUM(*s))    /* UTF handled below */
11388                 *d++ = *s++;
11389             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11390                 *d++ = ':';
11391                 *d++ = ':';
11392                 s++;
11393             }
11394             else if (*s == ':' && s[1] == ':') {
11395                 *d++ = *s++;
11396                 *d++ = *s++;
11397             }
11398             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11399                 char *t = s + UTF8SKIP(s);
11400                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11401                     t += UTF8SKIP(t);
11402                 if (d + (t - s) > e)
11403                     Perl_croak(aTHX_ ident_too_long);
11404                 Copy(s, d, t - s, char);
11405                 d += t - s;
11406                 s = t;
11407             }
11408             else
11409                 break;
11410         }
11411     }
11412     *d = '\0';
11413     d = dest;
11414     if (*d) {
11415         if (PL_lex_state != LEX_NORMAL)
11416             PL_lex_state = LEX_INTERPENDMAYBE;
11417         return s;
11418     }
11419     if (*s == '$' && s[1] &&
11420         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11421     {
11422         return s;
11423     }
11424     if (*s == '{') {
11425         bracket = s;
11426         s++;
11427     }
11428     else if (ck_uni)
11429         check_uni();
11430     if (s < send)
11431         *d = *s++;
11432     d[1] = '\0';
11433     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11434         *d = toCTRL(*s);
11435         s++;
11436     }
11437     if (bracket) {
11438         if (isSPACE(s[-1])) {
11439             while (s < send) {
11440                 const char ch = *s++;
11441                 if (!SPACE_OR_TAB(ch)) {
11442                     *d = ch;
11443                     break;
11444                 }
11445             }
11446         }
11447         if (isIDFIRST_lazy_if(d,UTF)) {
11448             d++;
11449             if (UTF) {
11450                 char *end = s;
11451                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11452                     end += UTF8SKIP(end);
11453                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11454                         end += UTF8SKIP(end);
11455                 }
11456                 Copy(s, d, end - s, char);
11457                 d += end - s;
11458                 s = end;
11459             }
11460             else {
11461                 while ((isALNUM(*s) || *s == ':') && d < e)
11462                     *d++ = *s++;
11463                 if (d >= e)
11464                     Perl_croak(aTHX_ ident_too_long);
11465             }
11466             *d = '\0';
11467             while (s < send && SPACE_OR_TAB(*s))
11468                 s++;
11469             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11470                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11471                     const char * const brack =
11472                         (const char *)
11473                         ((*s == '[') ? "[...]" : "{...}");
11474                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11475                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11476                         funny, dest, brack, funny, dest, brack);
11477                 }
11478                 bracket++;
11479                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11480                 return s;
11481             }
11482         }
11483         /* Handle extended ${^Foo} variables
11484          * 1999-02-27 mjd-perl-patch@plover.com */
11485         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11486                  && isALNUM(*s))
11487         {
11488             d++;
11489             while (isALNUM(*s) && d < e) {
11490                 *d++ = *s++;
11491             }
11492             if (d >= e)
11493                 Perl_croak(aTHX_ ident_too_long);
11494             *d = '\0';
11495         }
11496         if (*s == '}') {
11497             s++;
11498             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11499                 PL_lex_state = LEX_INTERPEND;
11500                 PL_expect = XREF;
11501             }
11502             if (PL_lex_state == LEX_NORMAL) {
11503                 if (ckWARN(WARN_AMBIGUOUS) &&
11504                     (keyword(dest, d - dest, 0)
11505                      || get_cvn_flags(dest, d - dest, 0)))
11506                 {
11507                     if (funny == '#')
11508                         funny = '@';
11509                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11510                         "Ambiguous use of %c{%s} resolved to %c%s",
11511                         funny, dest, funny, dest);
11512                 }
11513             }
11514         }
11515         else {
11516             s = bracket;                /* let the parser handle it */
11517             *dest = '\0';
11518         }
11519     }
11520     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11521         PL_lex_state = LEX_INTERPEND;
11522     return s;
11523 }
11524
11525 static U32
11526 S_pmflag(U32 pmfl, const char ch) {
11527     switch (ch) {
11528         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11529     case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
11530     case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
11531     case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
11532     case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
11533     }
11534     return pmfl;
11535 }
11536
11537 void
11538 Perl_pmflag(pTHX_ U32* pmfl, int ch)
11539 {
11540     PERL_ARGS_ASSERT_PMFLAG;
11541
11542     Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11543                      "Perl_pmflag() is deprecated, and will be removed from the XS API");
11544
11545     if (ch<256) {
11546         *pmfl = S_pmflag(*pmfl, (char)ch);
11547     }
11548 }
11549
11550 STATIC char *
11551 S_scan_pat(pTHX_ char *start, I32 type)
11552 {
11553     dVAR;
11554     PMOP *pm;
11555     char *s = scan_str(start,!!PL_madskills,FALSE);
11556     const char * const valid_flags =
11557         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11558 #ifdef PERL_MAD
11559     char *modstart;
11560 #endif
11561
11562     PERL_ARGS_ASSERT_SCAN_PAT;
11563
11564     if (!s) {
11565         const char * const delimiter = skipspace(start);
11566         Perl_croak(aTHX_
11567                    (const char *)
11568                    (*delimiter == '?'
11569                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11570                     : "Search pattern not terminated" ));
11571     }
11572
11573     pm = (PMOP*)newPMOP(type, 0);
11574     if (PL_multi_open == '?') {
11575         /* This is the only point in the code that sets PMf_ONCE:  */
11576         pm->op_pmflags |= PMf_ONCE;
11577
11578         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11579            allows us to restrict the list needed by reset to just the ??
11580            matches.  */
11581         assert(type != OP_TRANS);
11582         if (PL_curstash) {
11583             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11584             U32 elements;
11585             if (!mg) {
11586                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11587                                  0);
11588             }
11589             elements = mg->mg_len / sizeof(PMOP**);
11590             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11591             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11592             mg->mg_len = elements * sizeof(PMOP**);
11593             PmopSTASH_set(pm,PL_curstash);
11594         }
11595     }
11596 #ifdef PERL_MAD
11597     modstart = s;
11598 #endif
11599     while (*s && strchr(valid_flags, *s))
11600         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11601 #ifdef PERL_MAD
11602     if (PL_madskills && modstart != s) {
11603         SV* tmptoken = newSVpvn(modstart, s - modstart);
11604         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11605     }
11606 #endif
11607     /* issue a warning if /c is specified,but /g is not */
11608     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11609     {
11610         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11611                        "Use of /c modifier is meaningless without /g" );
11612     }
11613
11614     PL_lex_op = (OP*)pm;
11615     pl_yylval.ival = OP_MATCH;
11616     return s;
11617 }
11618
11619 STATIC char *
11620 S_scan_subst(pTHX_ char *start)
11621 {
11622     dVAR;
11623     register char *s;
11624     register PMOP *pm;
11625     I32 first_start;
11626     I32 es = 0;
11627 #ifdef PERL_MAD
11628     char *modstart;
11629 #endif
11630
11631     PERL_ARGS_ASSERT_SCAN_SUBST;
11632
11633     pl_yylval.ival = OP_NULL;
11634
11635     s = scan_str(start,!!PL_madskills,FALSE);
11636
11637     if (!s)
11638         Perl_croak(aTHX_ "Substitution pattern not terminated");
11639
11640     if (s[-1] == PL_multi_open)
11641         s--;
11642 #ifdef PERL_MAD
11643     if (PL_madskills) {
11644         CURMAD('q', PL_thisopen);
11645         CURMAD('_', PL_thiswhite);
11646         CURMAD('E', PL_thisstuff);
11647         CURMAD('Q', PL_thisclose);
11648         PL_realtokenstart = s - SvPVX(PL_linestr);
11649     }
11650 #endif
11651
11652     first_start = PL_multi_start;
11653     s = scan_str(s,!!PL_madskills,FALSE);
11654     if (!s) {
11655         if (PL_lex_stuff) {
11656             SvREFCNT_dec(PL_lex_stuff);
11657             PL_lex_stuff = NULL;
11658         }
11659         Perl_croak(aTHX_ "Substitution replacement not terminated");
11660     }
11661     PL_multi_start = first_start;       /* so whole substitution is taken together */
11662
11663     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11664
11665 #ifdef PERL_MAD
11666     if (PL_madskills) {
11667         CURMAD('z', PL_thisopen);
11668         CURMAD('R', PL_thisstuff);
11669         CURMAD('Z', PL_thisclose);
11670     }
11671     modstart = s;
11672 #endif
11673
11674     while (*s) {
11675         if (*s == EXEC_PAT_MOD) {
11676             s++;
11677             es++;
11678         }
11679         else if (strchr(S_PAT_MODS, *s))
11680             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11681         else
11682             break;
11683     }
11684
11685 #ifdef PERL_MAD
11686     if (PL_madskills) {
11687         if (modstart != s)
11688             curmad('m', newSVpvn(modstart, s - modstart));
11689         append_madprops(PL_thismad, (OP*)pm, 0);
11690         PL_thismad = 0;
11691     }
11692 #endif
11693     if ((pm->op_pmflags & PMf_CONTINUE)) {
11694         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11695     }
11696
11697     if (es) {
11698         SV * const repl = newSVpvs("");
11699
11700         PL_sublex_info.super_bufptr = s;
11701         PL_sublex_info.super_bufend = PL_bufend;
11702         PL_multi_end = 0;
11703         pm->op_pmflags |= PMf_EVAL;
11704         while (es-- > 0) {
11705             if (es)
11706                 sv_catpvs(repl, "eval ");
11707             else
11708                 sv_catpvs(repl, "do ");
11709         }
11710         sv_catpvs(repl, "{");
11711         sv_catsv(repl, PL_lex_repl);
11712         if (strchr(SvPVX(PL_lex_repl), '#'))
11713             sv_catpvs(repl, "\n");
11714         sv_catpvs(repl, "}");
11715         SvEVALED_on(repl);
11716         SvREFCNT_dec(PL_lex_repl);
11717         PL_lex_repl = repl;
11718     }
11719
11720     PL_lex_op = (OP*)pm;
11721     pl_yylval.ival = OP_SUBST;
11722     return s;
11723 }
11724
11725 STATIC char *
11726 S_scan_trans(pTHX_ char *start)
11727 {
11728     dVAR;
11729     register char* s;
11730     OP *o;
11731     short *tbl;
11732     U8 squash;
11733     U8 del;
11734     U8 complement;
11735 #ifdef PERL_MAD
11736     char *modstart;
11737 #endif
11738
11739     PERL_ARGS_ASSERT_SCAN_TRANS;
11740
11741     pl_yylval.ival = OP_NULL;
11742
11743     s = scan_str(start,!!PL_madskills,FALSE);
11744     if (!s)
11745         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11746
11747     if (s[-1] == PL_multi_open)
11748         s--;
11749 #ifdef PERL_MAD
11750     if (PL_madskills) {
11751         CURMAD('q', PL_thisopen);
11752         CURMAD('_', PL_thiswhite);
11753         CURMAD('E', PL_thisstuff);
11754         CURMAD('Q', PL_thisclose);
11755         PL_realtokenstart = s - SvPVX(PL_linestr);
11756     }
11757 #endif
11758
11759     s = scan_str(s,!!PL_madskills,FALSE);
11760     if (!s) {
11761         if (PL_lex_stuff) {
11762             SvREFCNT_dec(PL_lex_stuff);
11763             PL_lex_stuff = NULL;
11764         }
11765         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11766     }
11767     if (PL_madskills) {
11768         CURMAD('z', PL_thisopen);
11769         CURMAD('R', PL_thisstuff);
11770         CURMAD('Z', PL_thisclose);
11771     }
11772
11773     complement = del = squash = 0;
11774 #ifdef PERL_MAD
11775     modstart = s;
11776 #endif
11777     while (1) {
11778         switch (*s) {
11779         case 'c':
11780             complement = OPpTRANS_COMPLEMENT;
11781             break;
11782         case 'd':
11783             del = OPpTRANS_DELETE;
11784             break;
11785         case 's':
11786             squash = OPpTRANS_SQUASH;
11787             break;
11788         default:
11789             goto no_more;
11790         }
11791         s++;
11792     }
11793   no_more:
11794
11795     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11796     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11797     o->op_private &= ~OPpTRANS_ALL;
11798     o->op_private |= del|squash|complement|
11799       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11800       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11801
11802     PL_lex_op = o;
11803     pl_yylval.ival = OP_TRANS;
11804
11805 #ifdef PERL_MAD
11806     if (PL_madskills) {
11807         if (modstart != s)
11808             curmad('m', newSVpvn(modstart, s - modstart));
11809         append_madprops(PL_thismad, o, 0);
11810         PL_thismad = 0;
11811     }
11812 #endif
11813
11814     return s;
11815 }
11816
11817 STATIC char *
11818 S_scan_heredoc(pTHX_ register char *s)
11819 {
11820     dVAR;
11821     SV *herewas;
11822     I32 op_type = OP_SCALAR;
11823     I32 len;
11824     SV *tmpstr;
11825     char term;
11826     const char *found_newline;
11827     register char *d;
11828     register char *e;
11829     char *peek;
11830     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11831 #ifdef PERL_MAD
11832     I32 stuffstart = s - SvPVX(PL_linestr);
11833     char *tstart;
11834
11835     PL_realtokenstart = -1;
11836 #endif
11837
11838     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11839
11840     s += 2;
11841     d = PL_tokenbuf;
11842     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11843     if (!outer)
11844         *d++ = '\n';
11845     peek = s;
11846     while (SPACE_OR_TAB(*peek))
11847         peek++;
11848     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11849         s = peek;
11850         term = *s++;
11851         s = delimcpy(d, e, s, PL_bufend, term, &len);
11852         d += len;
11853         if (s < PL_bufend)
11854             s++;
11855     }
11856     else {
11857         if (*s == '\\')
11858             s++, term = '\'';
11859         else
11860             term = '"';
11861         if (!isALNUM_lazy_if(s,UTF))
11862             deprecate("bare << to mean <<\"\"");
11863         for (; isALNUM_lazy_if(s,UTF); s++) {
11864             if (d < e)
11865                 *d++ = *s;
11866         }
11867     }
11868     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11869         Perl_croak(aTHX_ "Delimiter for here document is too long");
11870     *d++ = '\n';
11871     *d = '\0';
11872     len = d - PL_tokenbuf;
11873
11874 #ifdef PERL_MAD
11875     if (PL_madskills) {
11876         tstart = PL_tokenbuf + !outer;
11877         PL_thisclose = newSVpvn(tstart, len - !outer);
11878         tstart = SvPVX(PL_linestr) + stuffstart;
11879         PL_thisopen = newSVpvn(tstart, s - tstart);
11880         stuffstart = s - SvPVX(PL_linestr);
11881     }
11882 #endif
11883 #ifndef PERL_STRICT_CR
11884     d = strchr(s, '\r');
11885     if (d) {
11886         char * const olds = s;
11887         s = d;
11888         while (s < PL_bufend) {
11889             if (*s == '\r') {
11890                 *d++ = '\n';
11891                 if (*++s == '\n')
11892                     s++;
11893             }
11894             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11895                 *d++ = *s++;
11896                 s++;
11897             }
11898             else
11899                 *d++ = *s++;
11900         }
11901         *d = '\0';
11902         PL_bufend = d;
11903         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11904         s = olds;
11905     }
11906 #endif
11907 #ifdef PERL_MAD
11908     found_newline = 0;
11909 #endif
11910     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11911         herewas = newSVpvn(s,PL_bufend-s);
11912     }
11913     else {
11914 #ifdef PERL_MAD
11915         herewas = newSVpvn(s-1,found_newline-s+1);
11916 #else
11917         s--;
11918         herewas = newSVpvn(s,found_newline-s);
11919 #endif
11920     }
11921 #ifdef PERL_MAD
11922     if (PL_madskills) {
11923         tstart = SvPVX(PL_linestr) + stuffstart;
11924         if (PL_thisstuff)
11925             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11926         else
11927             PL_thisstuff = newSVpvn(tstart, s - tstart);
11928     }
11929 #endif
11930     s += SvCUR(herewas);
11931
11932 #ifdef PERL_MAD
11933     stuffstart = s - SvPVX(PL_linestr);
11934
11935     if (found_newline)
11936         s--;
11937 #endif
11938
11939     tmpstr = newSV_type(SVt_PVIV);
11940     SvGROW(tmpstr, 80);
11941     if (term == '\'') {
11942         op_type = OP_CONST;
11943         SvIV_set(tmpstr, -1);
11944     }
11945     else if (term == '`') {
11946         op_type = OP_BACKTICK;
11947         SvIV_set(tmpstr, '\\');
11948     }
11949
11950     CLINE;
11951     PL_multi_start = CopLINE(PL_curcop);
11952     PL_multi_open = PL_multi_close = '<';
11953     term = *PL_tokenbuf;
11954     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11955         char * const bufptr = PL_sublex_info.super_bufptr;
11956         char * const bufend = PL_sublex_info.super_bufend;
11957         char * const olds = s - SvCUR(herewas);
11958         s = strchr(bufptr, '\n');
11959         if (!s)
11960             s = bufend;
11961         d = s;
11962         while (s < bufend &&
11963           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11964             if (*s++ == '\n')
11965                 CopLINE_inc(PL_curcop);
11966         }
11967         if (s >= bufend) {
11968             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11969             missingterm(PL_tokenbuf);
11970         }
11971         sv_setpvn(herewas,bufptr,d-bufptr+1);
11972         sv_setpvn(tmpstr,d+1,s-d);
11973         s += len - 1;
11974         sv_catpvn(herewas,s,bufend-s);
11975         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11976
11977         s = olds;
11978         goto retval;
11979     }
11980     else if (!outer) {
11981         d = s;
11982         while (s < PL_bufend &&
11983           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11984             if (*s++ == '\n')
11985                 CopLINE_inc(PL_curcop);
11986         }
11987         if (s >= PL_bufend) {
11988             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11989             missingterm(PL_tokenbuf);
11990         }
11991         sv_setpvn(tmpstr,d+1,s-d);
11992 #ifdef PERL_MAD
11993         if (PL_madskills) {
11994             if (PL_thisstuff)
11995                 sv_catpvn(PL_thisstuff, d + 1, s - d);
11996             else
11997                 PL_thisstuff = newSVpvn(d + 1, s - d);
11998             stuffstart = s - SvPVX(PL_linestr);
11999         }
12000 #endif
12001         s += len - 1;
12002         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12003
12004         sv_catpvn(herewas,s,PL_bufend-s);
12005         sv_setsv(PL_linestr,herewas);
12006         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12007         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12008         PL_last_lop = PL_last_uni = NULL;
12009     }
12010     else
12011         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12012     while (s >= PL_bufend) {    /* multiple line string? */
12013 #ifdef PERL_MAD
12014         if (PL_madskills) {
12015             tstart = SvPVX(PL_linestr) + stuffstart;
12016             if (PL_thisstuff)
12017                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12018             else
12019                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12020         }
12021 #endif
12022         PL_bufptr = s;
12023         CopLINE_inc(PL_curcop);
12024         if (!outer || !lex_next_chunk(0)) {
12025             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12026             missingterm(PL_tokenbuf);
12027         }
12028         CopLINE_dec(PL_curcop);
12029         s = PL_bufptr;
12030 #ifdef PERL_MAD
12031         stuffstart = s - SvPVX(PL_linestr);
12032 #endif
12033         CopLINE_inc(PL_curcop);
12034         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12035         PL_last_lop = PL_last_uni = NULL;
12036 #ifndef PERL_STRICT_CR
12037         if (PL_bufend - PL_linestart >= 2) {
12038             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12039                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12040             {
12041                 PL_bufend[-2] = '\n';
12042                 PL_bufend--;
12043                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12044             }
12045             else if (PL_bufend[-1] == '\r')
12046                 PL_bufend[-1] = '\n';
12047         }
12048         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12049             PL_bufend[-1] = '\n';
12050 #endif
12051         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12052             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12053             *(SvPVX(PL_linestr) + off ) = ' ';
12054             sv_catsv(PL_linestr,herewas);
12055             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12056             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12057         }
12058         else {
12059             s = PL_bufend;
12060             sv_catsv(tmpstr,PL_linestr);
12061         }
12062     }
12063     s++;
12064 retval:
12065     PL_multi_end = CopLINE(PL_curcop);
12066     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12067         SvPV_shrink_to_cur(tmpstr);
12068     }
12069     SvREFCNT_dec(herewas);
12070     if (!IN_BYTES) {
12071         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12072             SvUTF8_on(tmpstr);
12073         else if (PL_encoding)
12074             sv_recode_to_utf8(tmpstr, PL_encoding);
12075     }
12076     PL_lex_stuff = tmpstr;
12077     pl_yylval.ival = op_type;
12078     return s;
12079 }
12080
12081 /* scan_inputsymbol
12082    takes: current position in input buffer
12083    returns: new position in input buffer
12084    side-effects: pl_yylval and lex_op are set.
12085
12086    This code handles:
12087
12088    <>           read from ARGV
12089    <FH>         read from filehandle
12090    <pkg::FH>    read from package qualified filehandle
12091    <pkg'FH>     read from package qualified filehandle
12092    <$fh>        read from filehandle in $fh
12093    <*.h>        filename glob
12094
12095 */
12096
12097 STATIC char *
12098 S_scan_inputsymbol(pTHX_ char *start)
12099 {
12100     dVAR;
12101     register char *s = start;           /* current position in buffer */
12102     char *end;
12103     I32 len;
12104     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12105     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12106
12107     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12108
12109     end = strchr(s, '\n');
12110     if (!end)
12111         end = PL_bufend;
12112     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12113
12114     /* die if we didn't have space for the contents of the <>,
12115        or if it didn't end, or if we see a newline
12116     */
12117
12118     if (len >= (I32)sizeof PL_tokenbuf)
12119         Perl_croak(aTHX_ "Excessively long <> operator");
12120     if (s >= end)
12121         Perl_croak(aTHX_ "Unterminated <> operator");
12122
12123     s++;
12124
12125     /* check for <$fh>
12126        Remember, only scalar variables are interpreted as filehandles by
12127        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12128        treated as a glob() call.
12129        This code makes use of the fact that except for the $ at the front,
12130        a scalar variable and a filehandle look the same.
12131     */
12132     if (*d == '$' && d[1]) d++;
12133
12134     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12135     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12136         d++;
12137
12138     /* If we've tried to read what we allow filehandles to look like, and
12139        there's still text left, then it must be a glob() and not a getline.
12140        Use scan_str to pull out the stuff between the <> and treat it
12141        as nothing more than a string.
12142     */
12143
12144     if (d - PL_tokenbuf != len) {
12145         pl_yylval.ival = OP_GLOB;
12146         s = scan_str(start,!!PL_madskills,FALSE);
12147         if (!s)
12148            Perl_croak(aTHX_ "Glob not terminated");
12149         return s;
12150     }
12151     else {
12152         bool readline_overriden = FALSE;
12153         GV *gv_readline;
12154         GV **gvp;
12155         /* we're in a filehandle read situation */
12156         d = PL_tokenbuf;
12157
12158         /* turn <> into <ARGV> */
12159         if (!len)
12160             Copy("ARGV",d,5,char);
12161
12162         /* Check whether readline() is overriden */
12163         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12164         if ((gv_readline
12165                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12166                 ||
12167                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12168                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12169                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12170             readline_overriden = TRUE;
12171
12172         /* if <$fh>, create the ops to turn the variable into a
12173            filehandle
12174         */
12175         if (*d == '$') {
12176             /* try to find it in the pad for this block, otherwise find
12177                add symbol table ops
12178             */
12179             const PADOFFSET tmp = pad_findmy(d, len, 0);
12180             if (tmp != NOT_IN_PAD) {
12181                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12182                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12183                     HEK * const stashname = HvNAME_HEK(stash);
12184                     SV * const sym = sv_2mortal(newSVhek(stashname));
12185                     sv_catpvs(sym, "::");
12186                     sv_catpv(sym, d+1);
12187                     d = SvPVX(sym);
12188                     goto intro_sym;
12189                 }
12190                 else {
12191                     OP * const o = newOP(OP_PADSV, 0);
12192                     o->op_targ = tmp;
12193                     PL_lex_op = readline_overriden
12194                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12195                                 append_elem(OP_LIST, o,
12196                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12197                         : (OP*)newUNOP(OP_READLINE, 0, o);
12198                 }
12199             }
12200             else {
12201                 GV *gv;
12202                 ++d;
12203 intro_sym:
12204                 gv = gv_fetchpv(d,
12205                                 (PL_in_eval
12206                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12207                                  : GV_ADDMULTI),
12208                                 SVt_PV);
12209                 PL_lex_op = readline_overriden
12210                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12211                             append_elem(OP_LIST,
12212                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12213                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12214                     : (OP*)newUNOP(OP_READLINE, 0,
12215                             newUNOP(OP_RV2SV, 0,
12216                                 newGVOP(OP_GV, 0, gv)));
12217             }
12218             if (!readline_overriden)
12219                 PL_lex_op->op_flags |= OPf_SPECIAL;
12220             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12221             pl_yylval.ival = OP_NULL;
12222         }
12223
12224         /* If it's none of the above, it must be a literal filehandle
12225            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12226         else {
12227             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12228             PL_lex_op = readline_overriden
12229                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12230                         append_elem(OP_LIST,
12231                             newGVOP(OP_GV, 0, gv),
12232                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12233                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12234             pl_yylval.ival = OP_NULL;
12235         }
12236     }
12237
12238     return s;
12239 }
12240
12241
12242 /* scan_str
12243    takes: start position in buffer
12244           keep_quoted preserve \ on the embedded delimiter(s)
12245           keep_delims preserve the delimiters around the string
12246    returns: position to continue reading from buffer
12247    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12248         updates the read buffer.
12249
12250    This subroutine pulls a string out of the input.  It is called for:
12251         q               single quotes           q(literal text)
12252         '               single quotes           'literal text'
12253         qq              double quotes           qq(interpolate $here please)
12254         "               double quotes           "interpolate $here please"
12255         qx              backticks               qx(/bin/ls -l)
12256         `               backticks               `/bin/ls -l`
12257         qw              quote words             @EXPORT_OK = qw( func() $spam )
12258         m//             regexp match            m/this/
12259         s///            regexp substitute       s/this/that/
12260         tr///           string transliterate    tr/this/that/
12261         y///            string transliterate    y/this/that/
12262         ($*@)           sub prototypes          sub foo ($)
12263         (stuff)         sub attr parameters     sub foo : attr(stuff)
12264         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12265
12266    In most of these cases (all but <>, patterns and transliterate)
12267    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12268    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12269    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12270    calls scan_str().
12271
12272    It skips whitespace before the string starts, and treats the first
12273    character as the delimiter.  If the delimiter is one of ([{< then
12274    the corresponding "close" character )]}> is used as the closing
12275    delimiter.  It allows quoting of delimiters, and if the string has
12276    balanced delimiters ([{<>}]) it allows nesting.
12277
12278    On success, the SV with the resulting string is put into lex_stuff or,
12279    if that is already non-NULL, into lex_repl. The second case occurs only
12280    when parsing the RHS of the special constructs s/// and tr/// (y///).
12281    For convenience, the terminating delimiter character is stuffed into
12282    SvIVX of the SV.
12283 */
12284
12285 STATIC char *
12286 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12287 {
12288     dVAR;
12289     SV *sv;                             /* scalar value: string */
12290     const char *tmps;                   /* temp string, used for delimiter matching */
12291     register char *s = start;           /* current position in the buffer */
12292     register char term;                 /* terminating character */
12293     register char *to;                  /* current position in the sv's data */
12294     I32 brackets = 1;                   /* bracket nesting level */
12295     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12296     I32 termcode;                       /* terminating char. code */
12297     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12298     STRLEN termlen;                     /* length of terminating string */
12299     int last_off = 0;                   /* last position for nesting bracket */
12300 #ifdef PERL_MAD
12301     int stuffstart;
12302     char *tstart;
12303 #endif
12304
12305     PERL_ARGS_ASSERT_SCAN_STR;
12306
12307     /* skip space before the delimiter */
12308     if (isSPACE(*s)) {
12309         s = PEEKSPACE(s);
12310     }
12311
12312 #ifdef PERL_MAD
12313     if (PL_realtokenstart >= 0) {
12314         stuffstart = PL_realtokenstart;
12315         PL_realtokenstart = -1;
12316     }
12317     else
12318         stuffstart = start - SvPVX(PL_linestr);
12319 #endif
12320     /* mark where we are, in case we need to report errors */
12321     CLINE;
12322
12323     /* after skipping whitespace, the next character is the terminator */
12324     term = *s;
12325     if (!UTF) {
12326         termcode = termstr[0] = term;
12327         termlen = 1;
12328     }
12329     else {
12330         termcode = utf8_to_uvchr((U8*)s, &termlen);
12331         Copy(s, termstr, termlen, U8);
12332         if (!UTF8_IS_INVARIANT(term))
12333             has_utf8 = TRUE;
12334     }
12335
12336     /* mark where we are */
12337     PL_multi_start = CopLINE(PL_curcop);
12338     PL_multi_open = term;
12339
12340     /* find corresponding closing delimiter */
12341     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12342         termcode = termstr[0] = term = tmps[5];
12343
12344     PL_multi_close = term;
12345
12346     /* create a new SV to hold the contents.  79 is the SV's initial length.
12347        What a random number. */
12348     sv = newSV_type(SVt_PVIV);
12349     SvGROW(sv, 80);
12350     SvIV_set(sv, termcode);
12351     (void)SvPOK_only(sv);               /* validate pointer */
12352
12353     /* move past delimiter and try to read a complete string */
12354     if (keep_delims)
12355         sv_catpvn(sv, s, termlen);
12356     s += termlen;
12357 #ifdef PERL_MAD
12358     tstart = SvPVX(PL_linestr) + stuffstart;
12359     if (!PL_thisopen && !keep_delims) {
12360         PL_thisopen = newSVpvn(tstart, s - tstart);
12361         stuffstart = s - SvPVX(PL_linestr);
12362     }
12363 #endif
12364     for (;;) {
12365         if (PL_encoding && !UTF) {
12366             bool cont = TRUE;
12367
12368             while (cont) {
12369                 int offset = s - SvPVX_const(PL_linestr);
12370                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12371                                            &offset, (char*)termstr, termlen);
12372                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12373                 char * const svlast = SvEND(sv) - 1;
12374
12375                 for (; s < ns; s++) {
12376                     if (*s == '\n' && !PL_rsfp)
12377                         CopLINE_inc(PL_curcop);
12378                 }
12379                 if (!found)
12380                     goto read_more_line;
12381                 else {
12382                     /* handle quoted delimiters */
12383                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12384                         const char *t;
12385                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12386                             t--;
12387                         if ((svlast-1 - t) % 2) {
12388                             if (!keep_quoted) {
12389                                 *(svlast-1) = term;
12390                                 *svlast = '\0';
12391                                 SvCUR_set(sv, SvCUR(sv) - 1);
12392                             }
12393                             continue;
12394                         }
12395                     }
12396                     if (PL_multi_open == PL_multi_close) {
12397                         cont = FALSE;
12398                     }
12399                     else {
12400                         const char *t;
12401                         char *w;
12402                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12403                             /* At here, all closes are "was quoted" one,
12404                                so we don't check PL_multi_close. */
12405                             if (*t == '\\') {
12406                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12407                                     t++;
12408                                 else
12409                                     *w++ = *t++;
12410                             }
12411                             else if (*t == PL_multi_open)
12412                                 brackets++;
12413
12414                             *w = *t;
12415                         }
12416                         if (w < t) {
12417                             *w++ = term;
12418                             *w = '\0';
12419                             SvCUR_set(sv, w - SvPVX_const(sv));
12420                         }
12421                         last_off = w - SvPVX(sv);
12422                         if (--brackets <= 0)
12423                             cont = FALSE;
12424                     }
12425                 }
12426             }
12427             if (!keep_delims) {
12428                 SvCUR_set(sv, SvCUR(sv) - 1);
12429                 *SvEND(sv) = '\0';
12430             }
12431             break;
12432         }
12433
12434         /* extend sv if need be */
12435         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12436         /* set 'to' to the next character in the sv's string */
12437         to = SvPVX(sv)+SvCUR(sv);
12438
12439         /* if open delimiter is the close delimiter read unbridle */
12440         if (PL_multi_open == PL_multi_close) {
12441             for (; s < PL_bufend; s++,to++) {
12442                 /* embedded newlines increment the current line number */
12443                 if (*s == '\n' && !PL_rsfp)
12444                     CopLINE_inc(PL_curcop);
12445                 /* handle quoted delimiters */
12446                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12447                     if (!keep_quoted && s[1] == term)
12448                         s++;
12449                 /* any other quotes are simply copied straight through */
12450                     else
12451                         *to++ = *s++;
12452                 }
12453                 /* terminate when run out of buffer (the for() condition), or
12454                    have found the terminator */
12455                 else if (*s == term) {
12456                     if (termlen == 1)
12457                         break;
12458                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12459                         break;
12460                 }
12461                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12462                     has_utf8 = TRUE;
12463                 *to = *s;
12464             }
12465         }
12466
12467         /* if the terminator isn't the same as the start character (e.g.,
12468            matched brackets), we have to allow more in the quoting, and
12469            be prepared for nested brackets.
12470         */
12471         else {
12472             /* read until we run out of string, or we find the terminator */
12473             for (; s < PL_bufend; s++,to++) {
12474                 /* embedded newlines increment the line count */
12475                 if (*s == '\n' && !PL_rsfp)
12476                     CopLINE_inc(PL_curcop);
12477                 /* backslashes can escape the open or closing characters */
12478                 if (*s == '\\' && s+1 < PL_bufend) {
12479                     if (!keep_quoted &&
12480                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12481                         s++;
12482                     else
12483                         *to++ = *s++;
12484                 }
12485                 /* allow nested opens and closes */
12486                 else if (*s == PL_multi_close && --brackets <= 0)
12487                     break;
12488                 else if (*s == PL_multi_open)
12489                     brackets++;
12490                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12491                     has_utf8 = TRUE;
12492                 *to = *s;
12493             }
12494         }
12495         /* terminate the copied string and update the sv's end-of-string */
12496         *to = '\0';
12497         SvCUR_set(sv, to - SvPVX_const(sv));
12498
12499         /*
12500          * this next chunk reads more into the buffer if we're not done yet
12501          */
12502
12503         if (s < PL_bufend)
12504             break;              /* handle case where we are done yet :-) */
12505
12506 #ifndef PERL_STRICT_CR
12507         if (to - SvPVX_const(sv) >= 2) {
12508             if ((to[-2] == '\r' && to[-1] == '\n') ||
12509                 (to[-2] == '\n' && to[-1] == '\r'))
12510             {
12511                 to[-2] = '\n';
12512                 to--;
12513                 SvCUR_set(sv, to - SvPVX_const(sv));
12514             }
12515             else if (to[-1] == '\r')
12516                 to[-1] = '\n';
12517         }
12518         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12519             to[-1] = '\n';
12520 #endif
12521
12522      read_more_line:
12523         /* if we're out of file, or a read fails, bail and reset the current
12524            line marker so we can report where the unterminated string began
12525         */
12526 #ifdef PERL_MAD
12527         if (PL_madskills) {
12528             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12529             if (PL_thisstuff)
12530                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12531             else
12532                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12533         }
12534 #endif
12535         CopLINE_inc(PL_curcop);
12536         PL_bufptr = PL_bufend;
12537         if (!lex_next_chunk(0)) {
12538             sv_free(sv);
12539             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12540             return NULL;
12541         }
12542         s = PL_bufptr;
12543 #ifdef PERL_MAD
12544         stuffstart = 0;
12545 #endif
12546     }
12547
12548     /* at this point, we have successfully read the delimited string */
12549
12550     if (!PL_encoding || UTF) {
12551 #ifdef PERL_MAD
12552         if (PL_madskills) {
12553             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12554             const int len = s - tstart;
12555             if (PL_thisstuff)
12556                 sv_catpvn(PL_thisstuff, tstart, len);
12557             else
12558                 PL_thisstuff = newSVpvn(tstart, len);
12559             if (!PL_thisclose && !keep_delims)
12560                 PL_thisclose = newSVpvn(s,termlen);
12561         }
12562 #endif
12563
12564         if (keep_delims)
12565             sv_catpvn(sv, s, termlen);
12566         s += termlen;
12567     }
12568 #ifdef PERL_MAD
12569     else {
12570         if (PL_madskills) {
12571             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12572             const int len = s - tstart - termlen;
12573             if (PL_thisstuff)
12574                 sv_catpvn(PL_thisstuff, tstart, len);
12575             else
12576                 PL_thisstuff = newSVpvn(tstart, len);
12577             if (!PL_thisclose && !keep_delims)
12578                 PL_thisclose = newSVpvn(s - termlen,termlen);
12579         }
12580     }
12581 #endif
12582     if (has_utf8 || PL_encoding)
12583         SvUTF8_on(sv);
12584
12585     PL_multi_end = CopLINE(PL_curcop);
12586
12587     /* if we allocated too much space, give some back */
12588     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12589         SvLEN_set(sv, SvCUR(sv) + 1);
12590         SvPV_renew(sv, SvLEN(sv));
12591     }
12592
12593     /* decide whether this is the first or second quoted string we've read
12594        for this op
12595     */
12596
12597     if (PL_lex_stuff)
12598         PL_lex_repl = sv;
12599     else
12600         PL_lex_stuff = sv;
12601     return s;
12602 }
12603
12604 /*
12605   scan_num
12606   takes: pointer to position in buffer
12607   returns: pointer to new position in buffer
12608   side-effects: builds ops for the constant in pl_yylval.op
12609
12610   Read a number in any of the formats that Perl accepts:
12611
12612   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12613   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12614   0b[01](_?[01])*
12615   0[0-7](_?[0-7])*
12616   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12617
12618   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12619   thing it reads.
12620
12621   If it reads a number without a decimal point or an exponent, it will
12622   try converting the number to an integer and see if it can do so
12623   without loss of precision.
12624 */
12625
12626 char *
12627 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12628 {
12629     dVAR;
12630     register const char *s = start;     /* current position in buffer */
12631     register char *d;                   /* destination in temp buffer */
12632     register char *e;                   /* end of temp buffer */
12633     NV nv;                              /* number read, as a double */
12634     SV *sv = NULL;                      /* place to put the converted number */
12635     bool floatit;                       /* boolean: int or float? */
12636     const char *lastub = NULL;          /* position of last underbar */
12637     static char const number_too_long[] = "Number too long";
12638
12639     PERL_ARGS_ASSERT_SCAN_NUM;
12640
12641     /* We use the first character to decide what type of number this is */
12642
12643     switch (*s) {
12644     default:
12645       Perl_croak(aTHX_ "panic: scan_num");
12646
12647     /* if it starts with a 0, it could be an octal number, a decimal in
12648        0.13 disguise, or a hexadecimal number, or a binary number. */
12649     case '0':
12650         {
12651           /* variables:
12652              u          holds the "number so far"
12653              shift      the power of 2 of the base
12654                         (hex == 4, octal == 3, binary == 1)
12655              overflowed was the number more than we can hold?
12656
12657              Shift is used when we add a digit.  It also serves as an "are
12658              we in octal/hex/binary?" indicator to disallow hex characters
12659              when in octal mode.
12660            */
12661             NV n = 0.0;
12662             UV u = 0;
12663             I32 shift;
12664             bool overflowed = FALSE;
12665             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12666             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12667             static const char* const bases[5] =
12668               { "", "binary", "", "octal", "hexadecimal" };
12669             static const char* const Bases[5] =
12670               { "", "Binary", "", "Octal", "Hexadecimal" };
12671             static const char* const maxima[5] =
12672               { "",
12673                 "0b11111111111111111111111111111111",
12674                 "",
12675                 "037777777777",
12676                 "0xffffffff" };
12677             const char *base, *Base, *max;
12678
12679             /* check for hex */
12680             if (s[1] == 'x') {
12681                 shift = 4;
12682                 s += 2;
12683                 just_zero = FALSE;
12684             } else if (s[1] == 'b') {
12685                 shift = 1;
12686                 s += 2;
12687                 just_zero = FALSE;
12688             }
12689             /* check for a decimal in disguise */
12690             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12691                 goto decimal;
12692             /* so it must be octal */
12693             else {
12694                 shift = 3;
12695                 s++;
12696             }
12697
12698             if (*s == '_') {
12699                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12700                                "Misplaced _ in number");
12701                lastub = s++;
12702             }
12703
12704             base = bases[shift];
12705             Base = Bases[shift];
12706             max  = maxima[shift];
12707
12708             /* read the rest of the number */
12709             for (;;) {
12710                 /* x is used in the overflow test,
12711                    b is the digit we're adding on. */
12712                 UV x, b;
12713
12714                 switch (*s) {
12715
12716                 /* if we don't mention it, we're done */
12717                 default:
12718                     goto out;
12719
12720                 /* _ are ignored -- but warned about if consecutive */
12721                 case '_':
12722                     if (lastub && s == lastub + 1)
12723                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12724                                        "Misplaced _ in number");
12725                     lastub = s++;
12726                     break;
12727
12728                 /* 8 and 9 are not octal */
12729                 case '8': case '9':
12730                     if (shift == 3)
12731                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12732                     /* FALL THROUGH */
12733
12734                 /* octal digits */
12735                 case '2': case '3': case '4':
12736                 case '5': case '6': case '7':
12737                     if (shift == 1)
12738                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12739                     /* FALL THROUGH */
12740
12741                 case '0': case '1':
12742                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12743                     goto digit;
12744
12745                 /* hex digits */
12746                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12747                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12748                     /* make sure they said 0x */
12749                     if (shift != 4)
12750                         goto out;
12751                     b = (*s++ & 7) + 9;
12752
12753                     /* Prepare to put the digit we have onto the end
12754                        of the number so far.  We check for overflows.
12755                     */
12756
12757                   digit:
12758                     just_zero = FALSE;
12759                     if (!overflowed) {
12760                         x = u << shift; /* make room for the digit */
12761
12762                         if ((x >> shift) != u
12763                             && !(PL_hints & HINT_NEW_BINARY)) {
12764                             overflowed = TRUE;
12765                             n = (NV) u;
12766                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12767                                              "Integer overflow in %s number",
12768                                              base);
12769                         } else
12770                             u = x | b;          /* add the digit to the end */
12771                     }
12772                     if (overflowed) {
12773                         n *= nvshift[shift];
12774                         /* If an NV has not enough bits in its
12775                          * mantissa to represent an UV this summing of
12776                          * small low-order numbers is a waste of time
12777                          * (because the NV cannot preserve the
12778                          * low-order bits anyway): we could just
12779                          * remember when did we overflow and in the
12780                          * end just multiply n by the right
12781                          * amount. */
12782                         n += (NV) b;
12783                     }
12784                     break;
12785                 }
12786             }
12787
12788           /* if we get here, we had success: make a scalar value from
12789              the number.
12790           */
12791           out:
12792
12793             /* final misplaced underbar check */
12794             if (s[-1] == '_') {
12795                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12796             }
12797
12798             sv = newSV(0);
12799             if (overflowed) {
12800                 if (n > 4294967295.0)
12801                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12802                                    "%s number > %s non-portable",
12803                                    Base, max);
12804                 sv_setnv(sv, n);
12805             }
12806             else {
12807 #if UVSIZE > 4
12808                 if (u > 0xffffffff)
12809                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12810                                    "%s number > %s non-portable",
12811                                    Base, max);
12812 #endif
12813                 sv_setuv(sv, u);
12814             }
12815             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12816                 sv = new_constant(start, s - start, "integer",
12817                                   sv, NULL, NULL, 0);
12818             else if (PL_hints & HINT_NEW_BINARY)
12819                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12820         }
12821         break;
12822
12823     /*
12824       handle decimal numbers.
12825       we're also sent here when we read a 0 as the first digit
12826     */
12827     case '1': case '2': case '3': case '4': case '5':
12828     case '6': case '7': case '8': case '9': case '.':
12829       decimal:
12830         d = PL_tokenbuf;
12831         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12832         floatit = FALSE;
12833
12834         /* read next group of digits and _ and copy into d */
12835         while (isDIGIT(*s) || *s == '_') {
12836             /* skip underscores, checking for misplaced ones
12837                if -w is on
12838             */
12839             if (*s == '_') {
12840                 if (lastub && s == lastub + 1)
12841                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12842                                    "Misplaced _ in number");
12843                 lastub = s++;
12844             }
12845             else {
12846                 /* check for end of fixed-length buffer */
12847                 if (d >= e)
12848                     Perl_croak(aTHX_ number_too_long);
12849                 /* if we're ok, copy the character */
12850                 *d++ = *s++;
12851             }
12852         }
12853
12854         /* final misplaced underbar check */
12855         if (lastub && s == lastub + 1) {
12856             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12857         }
12858
12859         /* read a decimal portion if there is one.  avoid
12860            3..5 being interpreted as the number 3. followed
12861            by .5
12862         */
12863         if (*s == '.' && s[1] != '.') {
12864             floatit = TRUE;
12865             *d++ = *s++;
12866
12867             if (*s == '_') {
12868                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12869                                "Misplaced _ in number");
12870                 lastub = s;
12871             }
12872
12873             /* copy, ignoring underbars, until we run out of digits.
12874             */
12875             for (; isDIGIT(*s) || *s == '_'; s++) {
12876                 /* fixed length buffer check */
12877                 if (d >= e)
12878                     Perl_croak(aTHX_ number_too_long);
12879                 if (*s == '_') {
12880                    if (lastub && s == lastub + 1)
12881                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12882                                       "Misplaced _ in number");
12883                    lastub = s;
12884                 }
12885                 else
12886                     *d++ = *s;
12887             }
12888             /* fractional part ending in underbar? */
12889             if (s[-1] == '_') {
12890                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12891                                "Misplaced _ in number");
12892             }
12893             if (*s == '.' && isDIGIT(s[1])) {
12894                 /* oops, it's really a v-string, but without the "v" */
12895                 s = start;
12896                 goto vstring;
12897             }
12898         }
12899
12900         /* read exponent part, if present */
12901         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12902             floatit = TRUE;
12903             s++;
12904
12905             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12906             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12907
12908             /* stray preinitial _ */
12909             if (*s == '_') {
12910                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12911                                "Misplaced _ in number");
12912                 lastub = s++;
12913             }
12914
12915             /* allow positive or negative exponent */
12916             if (*s == '+' || *s == '-')
12917                 *d++ = *s++;
12918
12919             /* stray initial _ */
12920             if (*s == '_') {
12921                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12922                                "Misplaced _ in number");
12923                 lastub = s++;
12924             }
12925
12926             /* read digits of exponent */
12927             while (isDIGIT(*s) || *s == '_') {
12928                 if (isDIGIT(*s)) {
12929                     if (d >= e)
12930                         Perl_croak(aTHX_ number_too_long);
12931                     *d++ = *s++;
12932                 }
12933                 else {
12934                    if (((lastub && s == lastub + 1) ||
12935                         (!isDIGIT(s[1]) && s[1] != '_')))
12936                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12937                                       "Misplaced _ in number");
12938                    lastub = s++;
12939                 }
12940             }
12941         }
12942
12943
12944         /* make an sv from the string */
12945         sv = newSV(0);
12946
12947         /*
12948            We try to do an integer conversion first if no characters
12949            indicating "float" have been found.
12950          */
12951
12952         if (!floatit) {
12953             UV uv;
12954             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12955
12956             if (flags == IS_NUMBER_IN_UV) {
12957               if (uv <= IV_MAX)
12958                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12959               else
12960                 sv_setuv(sv, uv);
12961             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12962               if (uv <= (UV) IV_MIN)
12963                 sv_setiv(sv, -(IV)uv);
12964               else
12965                 floatit = TRUE;
12966             } else
12967               floatit = TRUE;
12968         }
12969         if (floatit) {
12970             /* terminate the string */
12971             *d = '\0';
12972             nv = Atof(PL_tokenbuf);
12973             sv_setnv(sv, nv);
12974         }
12975
12976         if ( floatit
12977              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12978             const char *const key = floatit ? "float" : "integer";
12979             const STRLEN keylen = floatit ? 5 : 7;
12980             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12981                                 key, keylen, sv, NULL, NULL, 0);
12982         }
12983         break;
12984
12985     /* if it starts with a v, it could be a v-string */
12986     case 'v':
12987 vstring:
12988                 sv = newSV(5); /* preallocate storage space */
12989                 s = scan_vstring(s, PL_bufend, sv);
12990         break;
12991     }
12992
12993     /* make the op for the constant and return */
12994
12995     if (sv)
12996         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12997     else
12998         lvalp->opval = NULL;
12999
13000     return (char *)s;
13001 }
13002
13003 STATIC char *
13004 S_scan_formline(pTHX_ register char *s)
13005 {
13006     dVAR;
13007     register char *eol;
13008     register char *t;
13009     SV * const stuff = newSVpvs("");
13010     bool needargs = FALSE;
13011     bool eofmt = FALSE;
13012 #ifdef PERL_MAD
13013     char *tokenstart = s;
13014     SV* savewhite = NULL;
13015
13016     if (PL_madskills) {
13017         savewhite = PL_thiswhite;
13018         PL_thiswhite = 0;
13019     }
13020 #endif
13021
13022     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13023
13024     while (!needargs) {
13025         if (*s == '.') {
13026             t = s+1;
13027 #ifdef PERL_STRICT_CR
13028             while (SPACE_OR_TAB(*t))
13029                 t++;
13030 #else
13031             while (SPACE_OR_TAB(*t) || *t == '\r')
13032                 t++;
13033 #endif
13034             if (*t == '\n' || t == PL_bufend) {
13035                 eofmt = TRUE;
13036                 break;
13037             }
13038         }
13039         if (PL_in_eval && !PL_rsfp) {
13040             eol = (char *) memchr(s,'\n',PL_bufend-s);
13041             if (!eol++)
13042                 eol = PL_bufend;
13043         }
13044         else
13045             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13046         if (*s != '#') {
13047             for (t = s; t < eol; t++) {
13048                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13049                     needargs = FALSE;
13050                     goto enough;        /* ~~ must be first line in formline */
13051                 }
13052                 if (*t == '@' || *t == '^')
13053                     needargs = TRUE;
13054             }
13055             if (eol > s) {
13056                 sv_catpvn(stuff, s, eol-s);
13057 #ifndef PERL_STRICT_CR
13058                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13059                     char *end = SvPVX(stuff) + SvCUR(stuff);
13060                     end[-2] = '\n';
13061                     end[-1] = '\0';
13062                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13063                 }
13064 #endif
13065             }
13066             else
13067               break;
13068         }
13069         s = (char*)eol;
13070         if (PL_rsfp) {
13071             bool got_some;
13072 #ifdef PERL_MAD
13073             if (PL_madskills) {
13074                 if (PL_thistoken)
13075                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13076                 else
13077                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13078             }
13079 #endif
13080             PL_bufptr = PL_bufend;
13081             CopLINE_inc(PL_curcop);
13082             got_some = lex_next_chunk(0);
13083             CopLINE_dec(PL_curcop);
13084             s = PL_bufptr;
13085 #ifdef PERL_MAD
13086             tokenstart = PL_bufptr;
13087 #endif
13088             if (!got_some)
13089                 break;
13090         }
13091         incline(s);
13092     }
13093   enough:
13094     if (SvCUR(stuff)) {
13095         PL_expect = XTERM;
13096         if (needargs) {
13097             PL_lex_state = LEX_NORMAL;
13098             start_force(PL_curforce);
13099             NEXTVAL_NEXTTOKE.ival = 0;
13100             force_next(',');
13101         }
13102         else
13103             PL_lex_state = LEX_FORMLINE;
13104         if (!IN_BYTES) {
13105             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13106                 SvUTF8_on(stuff);
13107             else if (PL_encoding)
13108                 sv_recode_to_utf8(stuff, PL_encoding);
13109         }
13110         start_force(PL_curforce);
13111         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13112         force_next(THING);
13113         start_force(PL_curforce);
13114         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13115         force_next(LSTOP);
13116     }
13117     else {
13118         SvREFCNT_dec(stuff);
13119         if (eofmt)
13120             PL_lex_formbrack = 0;
13121         PL_bufptr = s;
13122     }
13123 #ifdef PERL_MAD
13124     if (PL_madskills) {
13125         if (PL_thistoken)
13126             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13127         else
13128             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13129         PL_thiswhite = savewhite;
13130     }
13131 #endif
13132     return s;
13133 }
13134
13135 I32
13136 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13137 {
13138     dVAR;
13139     const I32 oldsavestack_ix = PL_savestack_ix;
13140     CV* const outsidecv = PL_compcv;
13141
13142     if (PL_compcv) {
13143         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13144     }
13145     SAVEI32(PL_subline);
13146     save_item(PL_subname);
13147     SAVESPTR(PL_compcv);
13148
13149     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13150     CvFLAGS(PL_compcv) |= flags;
13151
13152     PL_subline = CopLINE(PL_curcop);
13153     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13154     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13155     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13156
13157     return oldsavestack_ix;
13158 }
13159
13160 #ifdef __SC__
13161 #pragma segment Perl_yylex
13162 #endif
13163 static int
13164 S_yywarn(pTHX_ const char *const s)
13165 {
13166     dVAR;
13167
13168     PERL_ARGS_ASSERT_YYWARN;
13169
13170     PL_in_eval |= EVAL_WARNONLY;
13171     yyerror(s);
13172     PL_in_eval &= ~EVAL_WARNONLY;
13173     return 0;
13174 }
13175
13176 int
13177 Perl_yyerror(pTHX_ const char *const s)
13178 {
13179     dVAR;
13180     const char *where = NULL;
13181     const char *context = NULL;
13182     int contlen = -1;
13183     SV *msg;
13184     int yychar  = PL_parser->yychar;
13185
13186     PERL_ARGS_ASSERT_YYERROR;
13187
13188     if (!yychar || (yychar == ';' && !PL_rsfp))
13189         where = "at EOF";
13190     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13191       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13192       PL_oldbufptr != PL_bufptr) {
13193         /*
13194                 Only for NetWare:
13195                 The code below is removed for NetWare because it abends/crashes on NetWare
13196                 when the script has error such as not having the closing quotes like:
13197                     if ($var eq "value)
13198                 Checking of white spaces is anyway done in NetWare code.
13199         */
13200 #ifndef NETWARE
13201         while (isSPACE(*PL_oldoldbufptr))
13202             PL_oldoldbufptr++;
13203 #endif
13204         context = PL_oldoldbufptr;
13205         contlen = PL_bufptr - PL_oldoldbufptr;
13206     }
13207     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13208       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13209         /*
13210                 Only for NetWare:
13211                 The code below is removed for NetWare because it abends/crashes on NetWare
13212                 when the script has error such as not having the closing quotes like:
13213                     if ($var eq "value)
13214                 Checking of white spaces is anyway done in NetWare code.
13215         */
13216 #ifndef NETWARE
13217         while (isSPACE(*PL_oldbufptr))
13218             PL_oldbufptr++;
13219 #endif
13220         context = PL_oldbufptr;
13221         contlen = PL_bufptr - PL_oldbufptr;
13222     }
13223     else if (yychar > 255)
13224         where = "next token ???";
13225     else if (yychar == -2) { /* YYEMPTY */
13226         if (PL_lex_state == LEX_NORMAL ||
13227            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13228             where = "at end of line";
13229         else if (PL_lex_inpat)
13230             where = "within pattern";
13231         else
13232             where = "within string";
13233     }
13234     else {
13235         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13236         if (yychar < 32)
13237             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13238         else if (isPRINT_LC(yychar)) {
13239             const char string = yychar;
13240             sv_catpvn(where_sv, &string, 1);
13241         }
13242         else
13243             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13244         where = SvPVX_const(where_sv);
13245     }
13246     msg = sv_2mortal(newSVpv(s, 0));
13247     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13248         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13249     if (context)
13250         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13251     else
13252         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13253     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13254         Perl_sv_catpvf(aTHX_ msg,
13255         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13256                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13257         PL_multi_end = 0;
13258     }
13259     if (PL_in_eval & EVAL_WARNONLY) {
13260         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13261     }
13262     else
13263         qerror(msg);
13264     if (PL_error_count >= 10) {
13265         if (PL_in_eval && SvCUR(ERRSV))
13266             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13267                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13268         else
13269             Perl_croak(aTHX_ "%s has too many errors.\n",
13270             OutCopFILE(PL_curcop));
13271     }
13272     PL_in_my = 0;
13273     PL_in_my_stash = NULL;
13274     return 0;
13275 }
13276 #ifdef __SC__
13277 #pragma segment Main
13278 #endif
13279
13280 STATIC char*
13281 S_swallow_bom(pTHX_ U8 *s)
13282 {
13283     dVAR;
13284     const STRLEN slen = SvCUR(PL_linestr);
13285
13286     PERL_ARGS_ASSERT_SWALLOW_BOM;
13287
13288     switch (s[0]) {
13289     case 0xFF:
13290         if (s[1] == 0xFE) {
13291             /* UTF-16 little-endian? (or UTF-32LE?) */
13292             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13293                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13294 #ifndef PERL_NO_UTF16_FILTER
13295             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13296             s += 2;
13297             if (PL_bufend > (char*)s) {
13298                 s = add_utf16_textfilter(s, TRUE);
13299             }
13300 #else
13301             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13302 #endif
13303         }
13304         break;
13305     case 0xFE:
13306         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13307 #ifndef PERL_NO_UTF16_FILTER
13308             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13309             s += 2;
13310             if (PL_bufend > (char *)s) {
13311                 s = add_utf16_textfilter(s, FALSE);
13312             }
13313 #else
13314             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13315 #endif
13316         }
13317         break;
13318     case 0xEF:
13319         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13320             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13321             s += 3;                      /* UTF-8 */
13322         }
13323         break;
13324     case 0:
13325         if (slen > 3) {
13326              if (s[1] == 0) {
13327                   if (s[2] == 0xFE && s[3] == 0xFF) {
13328                        /* UTF-32 big-endian */
13329                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13330                   }
13331              }
13332              else if (s[2] == 0 && s[3] != 0) {
13333                   /* Leading bytes
13334                    * 00 xx 00 xx
13335                    * are a good indicator of UTF-16BE. */
13336 #ifndef PERL_NO_UTF16_FILTER
13337                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13338                   s = add_utf16_textfilter(s, FALSE);
13339 #else
13340                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13341 #endif
13342              }
13343         }
13344 #ifdef EBCDIC
13345     case 0xDD:
13346         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13347             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13348             s += 4;                      /* UTF-8 */
13349         }
13350         break;
13351 #endif
13352
13353     default:
13354          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13355                   /* Leading bytes
13356                    * xx 00 xx 00
13357                    * are a good indicator of UTF-16LE. */
13358 #ifndef PERL_NO_UTF16_FILTER
13359               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13360               s = add_utf16_textfilter(s, TRUE);
13361 #else
13362               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13363 #endif
13364          }
13365     }
13366     return (char*)s;
13367 }
13368
13369
13370 #ifndef PERL_NO_UTF16_FILTER
13371 static I32
13372 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13373 {
13374     dVAR;
13375     SV *const filter = FILTER_DATA(idx);
13376     /* We re-use this each time round, throwing the contents away before we
13377        return.  */
13378     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13379     SV *const utf8_buffer = filter;
13380     IV status = IoPAGE(filter);
13381     const bool reverse = (bool) IoLINES(filter);
13382     I32 retval;
13383
13384     /* As we're automatically added, at the lowest level, and hence only called
13385        from this file, we can be sure that we're not called in block mode. Hence
13386        don't bother writing code to deal with block mode.  */
13387     if (maxlen) {
13388         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13389     }
13390     if (status < 0) {
13391         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13392     }
13393     DEBUG_P(PerlIO_printf(Perl_debug_log,
13394                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13395                           FPTR2DPTR(void *, S_utf16_textfilter),
13396                           reverse ? 'l' : 'b', idx, maxlen, status,
13397                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13398
13399     while (1) {
13400         STRLEN chars;
13401         STRLEN have;
13402         I32 newlen;
13403         U8 *end;
13404         /* First, look in our buffer of existing UTF-8 data:  */
13405         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13406
13407         if (nl) {
13408             ++nl;
13409         } else if (status == 0) {
13410             /* EOF */
13411             IoPAGE(filter) = 0;
13412             nl = SvEND(utf8_buffer);
13413         }
13414         if (nl) {
13415             STRLEN got = nl - SvPVX(utf8_buffer);
13416             /* Did we have anything to append?  */
13417             retval = got != 0;
13418             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13419             /* Everything else in this code works just fine if SVp_POK isn't
13420                set.  This, however, needs it, and we need it to work, else
13421                we loop infinitely because the buffer is never consumed.  */
13422             sv_chop(utf8_buffer, nl);
13423             break;
13424         }
13425
13426         /* OK, not a complete line there, so need to read some more UTF-16.
13427            Read an extra octect if the buffer currently has an odd number. */
13428         while (1) {
13429             if (status <= 0)
13430                 break;
13431             if (SvCUR(utf16_buffer) >= 2) {
13432                 /* Location of the high octet of the last complete code point.
13433                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13434                    *coupled* with all the benefits of partial reads and
13435                    endianness.  */
13436                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13437                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13438
13439                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13440                     break;
13441                 }
13442
13443                 /* We have the first half of a surrogate. Read more.  */
13444                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13445             }
13446
13447             status = FILTER_READ(idx + 1, utf16_buffer,
13448                                  160 + (SvCUR(utf16_buffer) & 1));
13449             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13450             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13451             if (status < 0) {
13452                 /* Error */
13453                 IoPAGE(filter) = status;
13454                 return status;
13455             }
13456         }
13457
13458         chars = SvCUR(utf16_buffer) >> 1;
13459         have = SvCUR(utf8_buffer);
13460         SvGROW(utf8_buffer, have + chars * 3 + 1);
13461
13462         if (reverse) {
13463             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13464                                          (U8*)SvPVX_const(utf8_buffer) + have,
13465                                          chars * 2, &newlen);
13466         } else {
13467             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13468                                 (U8*)SvPVX_const(utf8_buffer) + have,
13469                                 chars * 2, &newlen);
13470         }
13471         SvCUR_set(utf8_buffer, have + newlen);
13472         *end = '\0';
13473
13474         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13475            it's private to us, and utf16_to_utf8{,reversed} take a
13476            (pointer,length) pair, rather than a NUL-terminated string.  */
13477         if(SvCUR(utf16_buffer) & 1) {
13478             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13479             SvCUR_set(utf16_buffer, 1);
13480         } else {
13481             SvCUR_set(utf16_buffer, 0);
13482         }
13483     }
13484     DEBUG_P(PerlIO_printf(Perl_debug_log,
13485                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13486                           status,
13487                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13488     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13489     return retval;
13490 }
13491
13492 static U8 *
13493 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13494 {
13495     SV *filter = filter_add(S_utf16_textfilter, NULL);
13496
13497     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13498     sv_setpvs(filter, "");
13499     IoLINES(filter) = reversed;
13500     IoPAGE(filter) = 1; /* Not EOF */
13501
13502     /* Sadly, we have to return a valid pointer, come what may, so we have to
13503        ignore any error return from this.  */
13504     SvCUR_set(PL_linestr, 0);
13505     if (FILTER_READ(0, PL_linestr, 0)) {
13506         SvUTF8_on(PL_linestr);
13507     } else {
13508         SvUTF8_on(PL_linestr);
13509     }
13510     PL_bufend = SvEND(PL_linestr);
13511     return (U8*)SvPVX(PL_linestr);
13512 }
13513 #endif
13514
13515 /*
13516 Returns a pointer to the next character after the parsed
13517 vstring, as well as updating the passed in sv.
13518
13519 Function must be called like
13520
13521         sv = newSV(5);
13522         s = scan_vstring(s,e,sv);
13523
13524 where s and e are the start and end of the string.
13525 The sv should already be large enough to store the vstring
13526 passed in, for performance reasons.
13527
13528 */
13529
13530 char *
13531 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13532 {
13533     dVAR;
13534     const char *pos = s;
13535     const char *start = s;
13536
13537     PERL_ARGS_ASSERT_SCAN_VSTRING;
13538
13539     if (*pos == 'v') pos++;  /* get past 'v' */
13540     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13541         pos++;
13542     if ( *pos != '.') {
13543         /* this may not be a v-string if followed by => */
13544         const char *next = pos;
13545         while (next < e && isSPACE(*next))
13546             ++next;
13547         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13548             /* return string not v-string */
13549             sv_setpvn(sv,(char *)s,pos-s);
13550             return (char *)pos;
13551         }
13552     }
13553
13554     if (!isALPHA(*pos)) {
13555         U8 tmpbuf[UTF8_MAXBYTES+1];
13556
13557         if (*s == 'v')
13558             s++;  /* get past 'v' */
13559
13560         sv_setpvs(sv, "");
13561
13562         for (;;) {
13563             /* this is atoi() that tolerates underscores */
13564             U8 *tmpend;
13565             UV rev = 0;
13566             const char *end = pos;
13567             UV mult = 1;
13568             while (--end >= s) {
13569                 if (*end != '_') {
13570                     const UV orev = rev;
13571                     rev += (*end - '0') * mult;
13572                     mult *= 10;
13573                     if (orev > rev)
13574                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13575                                          "Integer overflow in decimal number");
13576                 }
13577             }
13578 #ifdef EBCDIC
13579             if (rev > 0x7FFFFFFF)
13580                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13581 #endif
13582             /* Append native character for the rev point */
13583             tmpend = uvchr_to_utf8(tmpbuf, rev);
13584             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13585             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13586                  SvUTF8_on(sv);
13587             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13588                  s = ++pos;
13589             else {
13590                  s = pos;
13591                  break;
13592             }
13593             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13594                  pos++;
13595         }
13596         SvPOK_on(sv);
13597         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13598         SvRMAGICAL_on(sv);
13599     }
13600     return (char *)s;
13601 }
13602
13603 int
13604 Perl_keyword_plugin_standard(pTHX_
13605         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13606 {
13607     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13608     PERL_UNUSED_CONTEXT;
13609     PERL_UNUSED_ARG(keyword_ptr);
13610     PERL_UNUSED_ARG(keyword_len);
13611     PERL_UNUSED_ARG(op_ptr);
13612     return KEYWORD_PLUGIN_DECLINE;
13613 }
13614
13615 /*
13616  * Local variables:
13617  * c-indentation-style: bsd
13618  * c-basic-offset: 4
13619  * indent-tabs-mode: t
13620  * End:
13621  *
13622  * ex: set ts=8 sts=4 sw=4 noet:
13623  */