toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("switch")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 940 {
 941     char *bufptr;
 942     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 943     if (flags & ~(LEX_STUFF_UTF8))
 944         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 945     if (UTF) {
 946         if (flags & LEX_STUFF_UTF8) {
 947             goto plain_copy;
 948         } else {
 949             STRLEN highhalf = 0;
 950             char *p, *e = pv+len;
 951             for (p = pv; p != e; p++)
 952                 highhalf += !!(((U8)*p) & 0x80);
 953             if (!highhalf)
 954                 goto plain_copy;
 955             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 956             bufptr = PL_parser->bufptr;
 957             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 958             PL_parser->bufend += len+highhalf;
 959             for (p = pv; p != e; p++) {
 960                 U8 c = (U8)*p;
 961                 if (c & 0x80) {
 962                     *bufptr++ = (char)(0xc0 | (c >> 6));
 963                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 964                 } else {
 965                     *bufptr++ = (char)c;
 966                 }
 967             }
 968         }
 969     } else {
 970         if (flags & LEX_STUFF_UTF8) {
 971             STRLEN highhalf = 0;
 972             char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 U8 c = (U8)*p;
 975                 if (c >= 0xc4) {
 976                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 977                                 "non-Latin-1 character into Latin-1 input");
 978                 } else if (c >= 0xc2 && p+1 != e &&
 979                             (((U8)p[1]) & 0xc0) == 0x80) {
 980                     p++;
 981                     highhalf++;
 982                 } else if (c >= 0x80) {
 983                     /* malformed UTF-8 */
 984                     ENTER;
 985                     SAVESPTR(PL_warnhook);
 986                     PL_warnhook = PERL_WARNHOOK_FATAL;
 987                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 988                     LEAVE;
 989                 }
 990             }
 991             if (!highhalf)
 992                 goto plain_copy;
 993             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 994             bufptr = PL_parser->bufptr;
 995             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 996             PL_parser->bufend += len-highhalf;
 997             for (p = pv; p != e; p++) {
 998                 U8 c = (U8)*p;
 999                 if (c & 0x80) {
1000                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001                     p++;
1002                 } else {
1003                     *bufptr++ = (char)c;
1004                 }
1005             }
1006         } else {
1007             plain_copy:
1008             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009             bufptr = PL_parser->bufptr;
1010             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is the string value of I<sv>.  The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039 {
1040     char *pv;
1041     STRLEN len;
1042     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043     if (flags)
1044         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045     pv = SvPV(sv, len);
1046     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1056
1057 This is not the normal way to consume lexed text.  For that, use
1058 L</lex_read_to>.
1059
1060 =cut
1061 */
1062
1063 void
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1065 {
1066     char *buf, *bufend;
1067     STRLEN unstuff_len;
1068     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069     buf = PL_parser->bufptr;
1070     if (ptr < buf)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072     if (ptr == buf)
1073         return;
1074     bufend = PL_parser->bufend;
1075     if (ptr > bufend)
1076         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077     unstuff_len = ptr - buf;
1078     Move(ptr, buf, bufend+1-ptr, char);
1079     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080     PL_parser->bufend = bufend - unstuff_len;
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1085
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1090
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_read_to(pTHX_ char *ptr)
1100 {
1101     char *s;
1102     PERL_ARGS_ASSERT_LEX_READ_TO;
1103     s = PL_parser->bufptr;
1104     if (ptr < s || ptr > PL_parser->bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106     for (; s != ptr; s++)
1107         if (*s == '\n') {
1108             CopLINE_inc(PL_curcop);
1109             PL_parser->linestart = s+1;
1110         }
1111     PL_parser->bufptr = ptr;
1112 }
1113
1114 /*
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>.  The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately.  I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1122
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it.  However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer fof
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1130
1131 =cut
1132 */
1133
1134 void
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1136 {
1137     char *buf;
1138     STRLEN discard_len;
1139     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140     buf = SvPVX(PL_parser->linestr);
1141     if (ptr < buf)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143     if (ptr == buf)
1144         return;
1145     if (ptr > PL_parser->bufptr)
1146         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147     discard_len = ptr - buf;
1148     if (PL_parser->oldbufptr < ptr)
1149         PL_parser->oldbufptr = ptr;
1150     if (PL_parser->oldoldbufptr < ptr)
1151         PL_parser->oldoldbufptr = ptr;
1152     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153         PL_parser->last_uni = NULL;
1154     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155         PL_parser->last_lop = NULL;
1156     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158     PL_parser->bufend -= discard_len;
1159     PL_parser->bufptr -= discard_len;
1160     PL_parser->oldbufptr -= discard_len;
1161     PL_parser->oldoldbufptr -= discard_len;
1162     if (PL_parser->last_uni)
1163         PL_parser->last_uni -= discard_len;
1164     if (PL_parser->last_lop)
1165         PL_parser->last_lop -= discard_len;
1166 }
1167
1168 /*
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more.  It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1176
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded.  If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1183
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1186
1187 =cut
1188 */
1189
1190 #define LEX_FAKE_EOF 0x80000000
1191
1192 bool
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1194 {
1195     SV *linestr;
1196     char *buf;
1197     STRLEN old_bufend_pos, new_bufend_pos;
1198     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200     bool got_some_for_debugger = 0;
1201     bool got_some;
1202     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1204     linestr = PL_parser->linestr;
1205     buf = SvPVX(linestr);
1206     if (!(flags & LEX_KEEP_PREVIOUS) &&
1207             PL_parser->bufptr == PL_parser->bufend) {
1208         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1209         linestart_pos = 0;
1210         if (PL_parser->last_uni != PL_parser->bufend)
1211             PL_parser->last_uni = NULL;
1212         if (PL_parser->last_lop != PL_parser->bufend)
1213             PL_parser->last_lop = NULL;
1214         last_uni_pos = last_lop_pos = 0;
1215         *buf = 0;
1216         SvCUR(linestr) = 0;
1217     } else {
1218         old_bufend_pos = PL_parser->bufend - buf;
1219         bufptr_pos = PL_parser->bufptr - buf;
1220         oldbufptr_pos = PL_parser->oldbufptr - buf;
1221         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222         linestart_pos = PL_parser->linestart - buf;
1223         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1225     }
1226     if (flags & LEX_FAKE_EOF) {
1227         goto eof;
1228     } else if (!PL_parser->rsfp) {
1229         got_some = 0;
1230     } else if (filter_gets(linestr, old_bufend_pos)) {
1231         got_some = 1;
1232         got_some_for_debugger = 1;
1233     } else {
1234         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1235             sv_setpvs(linestr, "");
1236         eof:
1237         /* End of real input.  Close filehandle (unless it was STDIN),
1238          * then add implicit termination.
1239          */
1240         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241             PerlIO_clearerr(PL_parser->rsfp);
1242         else if (PL_parser->rsfp)
1243             (void)PerlIO_close(PL_parser->rsfp);
1244         PL_parser->rsfp = NULL;
1245         PL_doextract = FALSE;
1246 #ifdef PERL_MAD
1247         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1248             PL_faketokens = 1;
1249 #endif
1250         if (!PL_in_eval && PL_minus_p) {
1251             sv_catpvs(linestr,
1252                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253             PL_minus_n = PL_minus_p = 0;
1254         } else if (!PL_in_eval && PL_minus_n) {
1255             sv_catpvs(linestr, /*{*/";}");
1256             PL_minus_n = 0;
1257         } else
1258             sv_catpvs(linestr, ";");
1259         got_some = 1;
1260     }
1261     buf = SvPVX(linestr);
1262     new_bufend_pos = SvCUR(linestr);
1263     PL_parser->bufend = buf + new_bufend_pos;
1264     PL_parser->bufptr = buf + bufptr_pos;
1265     PL_parser->oldbufptr = buf + oldbufptr_pos;
1266     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267     PL_parser->linestart = buf + linestart_pos;
1268     if (PL_parser->last_uni)
1269         PL_parser->last_uni = buf + last_uni_pos;
1270     if (PL_parser->last_lop)
1271         PL_parser->last_lop = buf + last_lop_pos;
1272     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1273             PL_curstash != PL_debstash) {
1274         /* debugger active and we're not compiling the debugger code,
1275          * so store the line into the debugger's array of lines
1276          */
1277         update_debugger_info(NULL, buf+old_bufend_pos,
1278             new_bufend_pos-old_bufend_pos);
1279     }
1280     return got_some;
1281 }
1282
1283 /*
1284 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1285
1286 Looks ahead one (Unicode) character in the text currently being lexed.
1287 Returns the codepoint (unsigned integer value) of the next character,
1288 or -1 if lexing has reached the end of the input text.  To consume the
1289 peeked character, use L</lex_read_unichar>.
1290
1291 If the next character is in (or extends into) the next chunk of input
1292 text, the next chunk will be read in.  Normally the current chunk will be
1293 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294 then the current chunk will not be discarded.
1295
1296 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297 is encountered, an exception is generated.
1298
1299 =cut
1300 */
1301
1302 I32
1303 Perl_lex_peek_unichar(pTHX_ U32 flags)
1304 {
1305     char *s, *bufend;
1306     if (flags & ~(LEX_KEEP_PREVIOUS))
1307         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1308     s = PL_parser->bufptr;
1309     bufend = PL_parser->bufend;
1310     if (UTF) {
1311         U8 head;
1312         I32 unichar;
1313         STRLEN len, retlen;
1314         if (s == bufend) {
1315             if (!lex_next_chunk(flags))
1316                 return -1;
1317             s = PL_parser->bufptr;
1318             bufend = PL_parser->bufend;
1319         }
1320         head = (U8)*s;
1321         if (!(head & 0x80))
1322             return head;
1323         if (head & 0x40) {
1324             len = PL_utf8skip[head];
1325             while ((STRLEN)(bufend-s) < len) {
1326                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1327                     break;
1328                 s = PL_parser->bufptr;
1329                 bufend = PL_parser->bufend;
1330             }
1331         }
1332         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1333         if (retlen == (STRLEN)-1) {
1334             /* malformed UTF-8 */
1335             ENTER;
1336             SAVESPTR(PL_warnhook);
1337             PL_warnhook = PERL_WARNHOOK_FATAL;
1338             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1339             LEAVE;
1340         }
1341         return unichar;
1342     } else {
1343         if (s == bufend) {
1344             if (!lex_next_chunk(flags))
1345                 return -1;
1346             s = PL_parser->bufptr;
1347         }
1348         return (U8)*s;
1349     }
1350 }
1351
1352 /*
1353 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1354
1355 Reads the next (Unicode) character in the text currently being lexed.
1356 Returns the codepoint (unsigned integer value) of the character read,
1357 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1358 if lexing has reached the end of the input text.  To non-destructively
1359 examine the next character, use L</lex_peek_unichar> instead.
1360
1361 If the next character is in (or extends into) the next chunk of input
1362 text, the next chunk will be read in.  Normally the current chunk will be
1363 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1364 then the current chunk will not be discarded.
1365
1366 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367 is encountered, an exception is generated.
1368
1369 =cut
1370 */
1371
1372 I32
1373 Perl_lex_read_unichar(pTHX_ U32 flags)
1374 {
1375     I32 c;
1376     if (flags & ~(LEX_KEEP_PREVIOUS))
1377         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1378     c = lex_peek_unichar(flags);
1379     if (c != -1) {
1380         if (c == '\n')
1381             CopLINE_inc(PL_curcop);
1382         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1383     }
1384     return c;
1385 }
1386
1387 /*
1388 =for apidoc Amx|void|lex_read_space|U32 flags
1389
1390 Reads optional spaces, in Perl style, in the text currently being
1391 lexed.  The spaces may include ordinary whitespace characters and
1392 Perl-style comments.  C<#line> directives are processed if encountered.
1393 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1394 at a non-space character (or the end of the input text).
1395
1396 If spaces extend into the next chunk of input text, the next chunk will
1397 be read in.  Normally the current chunk will be discarded at the same
1398 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1399 chunk will not be discarded.
1400
1401 =cut
1402 */
1403
1404 void
1405 Perl_lex_read_space(pTHX_ U32 flags)
1406 {
1407     char *s, *bufend;
1408     bool need_incline = 0;
1409     if (flags & ~(LEX_KEEP_PREVIOUS))
1410         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1411 #ifdef PERL_MAD
1412     if (PL_skipwhite) {
1413         sv_free(PL_skipwhite);
1414         PL_skipwhite = NULL;
1415     }
1416     if (PL_madskills)
1417         PL_skipwhite = newSVpvs("");
1418 #endif /* PERL_MAD */
1419     s = PL_parser->bufptr;
1420     bufend = PL_parser->bufend;
1421     while (1) {
1422         char c = *s;
1423         if (c == '#') {
1424             do {
1425                 c = *++s;
1426             } while (!(c == '\n' || (c == 0 && s == bufend)));
1427         } else if (c == '\n') {
1428             s++;
1429             PL_parser->linestart = s;
1430             if (s == bufend)
1431                 need_incline = 1;
1432             else
1433                 incline(s);
1434         } else if (isSPACE(c)) {
1435             s++;
1436         } else if (c == 0 && s == bufend) {
1437             bool got_more;
1438 #ifdef PERL_MAD
1439             if (PL_madskills)
1440                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1441 #endif /* PERL_MAD */
1442             PL_parser->bufptr = s;
1443             CopLINE_inc(PL_curcop);
1444             got_more = lex_next_chunk(flags);
1445             CopLINE_dec(PL_curcop);
1446             s = PL_parser->bufptr;
1447             bufend = PL_parser->bufend;
1448             if (!got_more)
1449                 break;
1450             if (need_incline && PL_parser->rsfp) {
1451                 incline(s);
1452                 need_incline = 0;
1453             }
1454         } else {
1455             break;
1456         }
1457     }
1458 #ifdef PERL_MAD
1459     if (PL_madskills)
1460         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1461 #endif /* PERL_MAD */
1462     PL_parser->bufptr = s;
1463 }
1464
1465 /*
1466  * S_incline
1467  * This subroutine has nothing to do with tilting, whether at windmills
1468  * or pinball tables.  Its name is short for "increment line".  It
1469  * increments the current line number in CopLINE(PL_curcop) and checks
1470  * to see whether the line starts with a comment of the form
1471  *    # line 500 "foo.pm"
1472  * If so, it sets the current line number and file to the values in the comment.
1473  */
1474
1475 STATIC void
1476 S_incline(pTHX_ const char *s)
1477 {
1478     dVAR;
1479     const char *t;
1480     const char *n;
1481     const char *e;
1482
1483     PERL_ARGS_ASSERT_INCLINE;
1484
1485     CopLINE_inc(PL_curcop);
1486     if (*s++ != '#')
1487         return;
1488     while (SPACE_OR_TAB(*s))
1489         s++;
1490     if (strnEQ(s, "line", 4))
1491         s += 4;
1492     else
1493         return;
1494     if (SPACE_OR_TAB(*s))
1495         s++;
1496     else
1497         return;
1498     while (SPACE_OR_TAB(*s))
1499         s++;
1500     if (!isDIGIT(*s))
1501         return;
1502
1503     n = s;
1504     while (isDIGIT(*s))
1505         s++;
1506     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1507         return;
1508     while (SPACE_OR_TAB(*s))
1509         s++;
1510     if (*s == '"' && (t = strchr(s+1, '"'))) {
1511         s++;
1512         e = t + 1;
1513     }
1514     else {
1515         t = s;
1516         while (!isSPACE(*t))
1517             t++;
1518         e = t;
1519     }
1520     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1521         e++;
1522     if (*e != '\n' && *e != '\0')
1523         return;         /* false alarm */
1524
1525     if (t - s > 0) {
1526         const STRLEN len = t - s;
1527 #ifndef USE_ITHREADS
1528         SV *const temp_sv = CopFILESV(PL_curcop);
1529         const char *cf;
1530         STRLEN tmplen;
1531
1532         if (temp_sv) {
1533             cf = SvPVX(temp_sv);
1534             tmplen = SvCUR(temp_sv);
1535         } else {
1536             cf = NULL;
1537             tmplen = 0;
1538         }
1539
1540         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1541             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1542              * to *{"::_<newfilename"} */
1543             /* However, the long form of evals is only turned on by the
1544                debugger - usually they're "(eval %lu)" */
1545             char smallbuf[128];
1546             char *tmpbuf;
1547             GV **gvp;
1548             STRLEN tmplen2 = len;
1549             if (tmplen + 2 <= sizeof smallbuf)
1550                 tmpbuf = smallbuf;
1551             else
1552                 Newx(tmpbuf, tmplen + 2, char);
1553             tmpbuf[0] = '_';
1554             tmpbuf[1] = '<';
1555             memcpy(tmpbuf + 2, cf, tmplen);
1556             tmplen += 2;
1557             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1558             if (gvp) {
1559                 char *tmpbuf2;
1560                 GV *gv2;
1561
1562                 if (tmplen2 + 2 <= sizeof smallbuf)
1563                     tmpbuf2 = smallbuf;
1564                 else
1565                     Newx(tmpbuf2, tmplen2 + 2, char);
1566
1567                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1568                     /* Either they malloc'd it, or we malloc'd it,
1569                        so no prefix is present in ours.  */
1570                     tmpbuf2[0] = '_';
1571                     tmpbuf2[1] = '<';
1572                 }
1573
1574                 memcpy(tmpbuf2 + 2, s, tmplen2);
1575                 tmplen2 += 2;
1576
1577                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1578                 if (!isGV(gv2)) {
1579                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1580                     /* adjust ${"::_<newfilename"} to store the new file name */
1581                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1582                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1583                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1584                 }
1585
1586                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1587             }
1588             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1589         }
1590 #endif
1591         CopFILE_free(PL_curcop);
1592         CopFILE_setn(PL_curcop, s, len);
1593     }
1594     CopLINE_set(PL_curcop, atoi(n)-1);
1595 }
1596
1597 #ifdef PERL_MAD
1598 /* skip space before PL_thistoken */
1599
1600 STATIC char *
1601 S_skipspace0(pTHX_ register char *s)
1602 {
1603     PERL_ARGS_ASSERT_SKIPSPACE0;
1604
1605     s = skipspace(s);
1606     if (!PL_madskills)
1607         return s;
1608     if (PL_skipwhite) {
1609         if (!PL_thiswhite)
1610             PL_thiswhite = newSVpvs("");
1611         sv_catsv(PL_thiswhite, PL_skipwhite);
1612         sv_free(PL_skipwhite);
1613         PL_skipwhite = 0;
1614     }
1615     PL_realtokenstart = s - SvPVX(PL_linestr);
1616     return s;
1617 }
1618
1619 /* skip space after PL_thistoken */
1620
1621 STATIC char *
1622 S_skipspace1(pTHX_ register char *s)
1623 {
1624     const char *start = s;
1625     I32 startoff = start - SvPVX(PL_linestr);
1626
1627     PERL_ARGS_ASSERT_SKIPSPACE1;
1628
1629     s = skipspace(s);
1630     if (!PL_madskills)
1631         return s;
1632     start = SvPVX(PL_linestr) + startoff;
1633     if (!PL_thistoken && PL_realtokenstart >= 0) {
1634         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1635         PL_thistoken = newSVpvn(tstart, start - tstart);
1636     }
1637     PL_realtokenstart = -1;
1638     if (PL_skipwhite) {
1639         if (!PL_nextwhite)
1640             PL_nextwhite = newSVpvs("");
1641         sv_catsv(PL_nextwhite, PL_skipwhite);
1642         sv_free(PL_skipwhite);
1643         PL_skipwhite = 0;
1644     }
1645     return s;
1646 }
1647
1648 STATIC char *
1649 S_skipspace2(pTHX_ register char *s, SV **svp)
1650 {
1651     char *start;
1652     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1653     const I32 startoff = s - SvPVX(PL_linestr);
1654
1655     PERL_ARGS_ASSERT_SKIPSPACE2;
1656
1657     s = skipspace(s);
1658     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1659     if (!PL_madskills || !svp)
1660         return s;
1661     start = SvPVX(PL_linestr) + startoff;
1662     if (!PL_thistoken && PL_realtokenstart >= 0) {
1663         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1664         PL_thistoken = newSVpvn(tstart, start - tstart);
1665         PL_realtokenstart = -1;
1666     }
1667     if (PL_skipwhite) {
1668         if (!*svp)
1669             *svp = newSVpvs("");
1670         sv_setsv(*svp, PL_skipwhite);
1671         sv_free(PL_skipwhite);
1672         PL_skipwhite = 0;
1673     }
1674
1675     return s;
1676 }
1677 #endif
1678
1679 STATIC void
1680 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1681 {
1682     AV *av = CopFILEAVx(PL_curcop);
1683     if (av) {
1684         SV * const sv = newSV_type(SVt_PVMG);
1685         if (orig_sv)
1686             sv_setsv(sv, orig_sv);
1687         else
1688             sv_setpvn(sv, buf, len);
1689         (void)SvIOK_on(sv);
1690         SvIV_set(sv, 0);
1691         av_store(av, (I32)CopLINE(PL_curcop), sv);
1692     }
1693 }
1694
1695 /*
1696  * S_skipspace
1697  * Called to gobble the appropriate amount and type of whitespace.
1698  * Skips comments as well.
1699  */
1700
1701 STATIC char *
1702 S_skipspace(pTHX_ register char *s)
1703 {
1704 #ifdef PERL_MAD
1705     char *start = s;
1706 #endif /* PERL_MAD */
1707     PERL_ARGS_ASSERT_SKIPSPACE;
1708 #ifdef PERL_MAD
1709     if (PL_skipwhite) {
1710         sv_free(PL_skipwhite);
1711         PL_skipwhite = NULL;
1712     }
1713 #endif /* PERL_MAD */
1714     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1715         while (s < PL_bufend && SPACE_OR_TAB(*s))
1716             s++;
1717     } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1718         while (isSPACE(*s) && *s != '\n')
1719             s++;
1720         if (*s == '#') {
1721             do {
1722                 s++;
1723             } while (s != PL_bufend && *s != '\n');
1724         }
1725         if (*s == '\n')
1726             s++;
1727     } else {
1728         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1729         PL_bufptr = s;
1730         lex_read_space(LEX_KEEP_PREVIOUS);
1731         s = PL_bufptr;
1732         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1733         if (PL_linestart > PL_bufptr)
1734             PL_bufptr = PL_linestart;
1735         return s;
1736     }
1737 #ifdef PERL_MAD
1738     if (PL_madskills)
1739         PL_skipwhite = newSVpvn(start, s-start);
1740 #endif /* PERL_MAD */
1741     return s;
1742 }
1743
1744 /*
1745  * S_check_uni
1746  * Check the unary operators to ensure there's no ambiguity in how they're
1747  * used.  An ambiguous piece of code would be:
1748  *     rand + 5
1749  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1750  * the +5 is its argument.
1751  */
1752
1753 STATIC void
1754 S_check_uni(pTHX)
1755 {
1756     dVAR;
1757     const char *s;
1758     const char *t;
1759
1760     if (PL_oldoldbufptr != PL_last_uni)
1761         return;
1762     while (isSPACE(*PL_last_uni))
1763         PL_last_uni++;
1764     s = PL_last_uni;
1765     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1766         s++;
1767     if ((t = strchr(s, '(')) && t < PL_bufptr)
1768         return;
1769
1770     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1771                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1772                      (int)(s - PL_last_uni), PL_last_uni);
1773 }
1774
1775 /*
1776  * LOP : macro to build a list operator.  Its behaviour has been replaced
1777  * with a subroutine, S_lop() for which LOP is just another name.
1778  */
1779
1780 #define LOP(f,x) return lop(f,x,s)
1781
1782 /*
1783  * S_lop
1784  * Build a list operator (or something that might be one).  The rules:
1785  *  - if we have a next token, then it's a list operator [why?]
1786  *  - if the next thing is an opening paren, then it's a function
1787  *  - else it's a list operator
1788  */
1789
1790 STATIC I32
1791 S_lop(pTHX_ I32 f, int x, char *s)
1792 {
1793     dVAR;
1794
1795     PERL_ARGS_ASSERT_LOP;
1796
1797     pl_yylval.ival = f;
1798     CLINE;
1799     PL_expect = x;
1800     PL_bufptr = s;
1801     PL_last_lop = PL_oldbufptr;
1802     PL_last_lop_op = (OPCODE)f;
1803 #ifdef PERL_MAD
1804     if (PL_lasttoke)
1805         return REPORT(LSTOP);
1806 #else
1807     if (PL_nexttoke)
1808         return REPORT(LSTOP);
1809 #endif
1810     if (*s == '(')
1811         return REPORT(FUNC);
1812     s = PEEKSPACE(s);
1813     if (*s == '(')
1814         return REPORT(FUNC);
1815     else
1816         return REPORT(LSTOP);
1817 }
1818
1819 #ifdef PERL_MAD
1820  /*
1821  * S_start_force
1822  * Sets up for an eventual force_next().  start_force(0) basically does
1823  * an unshift, while start_force(-1) does a push.  yylex removes items
1824  * on the "pop" end.
1825  */
1826
1827 STATIC void
1828 S_start_force(pTHX_ int where)
1829 {
1830     int i;
1831
1832     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1833         where = PL_lasttoke;
1834     assert(PL_curforce < 0 || PL_curforce == where);
1835     if (PL_curforce != where) {
1836         for (i = PL_lasttoke; i > where; --i) {
1837             PL_nexttoke[i] = PL_nexttoke[i-1];
1838         }
1839         PL_lasttoke++;
1840     }
1841     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1842         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1843     PL_curforce = where;
1844     if (PL_nextwhite) {
1845         if (PL_madskills)
1846             curmad('^', newSVpvs(""));
1847         CURMAD('_', PL_nextwhite);
1848     }
1849 }
1850
1851 STATIC void
1852 S_curmad(pTHX_ char slot, SV *sv)
1853 {
1854     MADPROP **where;
1855
1856     if (!sv)
1857         return;
1858     if (PL_curforce < 0)
1859         where = &PL_thismad;
1860     else
1861         where = &PL_nexttoke[PL_curforce].next_mad;
1862
1863     if (PL_faketokens)
1864         sv_setpvs(sv, "");
1865     else {
1866         if (!IN_BYTES) {
1867             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1868                 SvUTF8_on(sv);
1869             else if (PL_encoding) {
1870                 sv_recode_to_utf8(sv, PL_encoding);
1871             }
1872         }
1873     }
1874
1875     /* keep a slot open for the head of the list? */
1876     if (slot != '_' && *where && (*where)->mad_key == '^') {
1877         (*where)->mad_key = slot;
1878         sv_free(MUTABLE_SV(((*where)->mad_val)));
1879         (*where)->mad_val = (void*)sv;
1880     }
1881     else
1882         addmad(newMADsv(slot, sv), where, 0);
1883 }
1884 #else
1885 #  define start_force(where)    NOOP
1886 #  define curmad(slot, sv)      NOOP
1887 #endif
1888
1889 /*
1890  * S_force_next
1891  * When the lexer realizes it knows the next token (for instance,
1892  * it is reordering tokens for the parser) then it can call S_force_next
1893  * to know what token to return the next time the lexer is called.  Caller
1894  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1895  * and possibly PL_expect to ensure the lexer handles the token correctly.
1896  */
1897
1898 STATIC void
1899 S_force_next(pTHX_ I32 type)
1900 {
1901     dVAR;
1902 #ifdef DEBUGGING
1903     if (DEBUG_T_TEST) {
1904         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1905         tokereport(type, &NEXTVAL_NEXTTOKE);
1906     }
1907 #endif
1908 #ifdef PERL_MAD
1909     if (PL_curforce < 0)
1910         start_force(PL_lasttoke);
1911     PL_nexttoke[PL_curforce].next_type = type;
1912     if (PL_lex_state != LEX_KNOWNEXT)
1913         PL_lex_defer = PL_lex_state;
1914     PL_lex_state = LEX_KNOWNEXT;
1915     PL_lex_expect = PL_expect;
1916     PL_curforce = -1;
1917 #else
1918     PL_nexttype[PL_nexttoke] = type;
1919     PL_nexttoke++;
1920     if (PL_lex_state != LEX_KNOWNEXT) {
1921         PL_lex_defer = PL_lex_state;
1922         PL_lex_expect = PL_expect;
1923         PL_lex_state = LEX_KNOWNEXT;
1924     }
1925 #endif
1926 }
1927
1928 STATIC SV *
1929 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1930 {
1931     dVAR;
1932     SV * const sv = newSVpvn_utf8(start, len,
1933                                   !IN_BYTES
1934                                   && UTF
1935                                   && !is_ascii_string((const U8*)start, len)
1936                                   && is_utf8_string((const U8*)start, len));
1937     return sv;
1938 }
1939
1940 /*
1941  * S_force_word
1942  * When the lexer knows the next thing is a word (for instance, it has
1943  * just seen -> and it knows that the next char is a word char, then
1944  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1945  * lookahead.
1946  *
1947  * Arguments:
1948  *   char *start : buffer position (must be within PL_linestr)
1949  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1950  *   int check_keyword : if true, Perl checks to make sure the word isn't
1951  *       a keyword (do this if the word is a label, e.g. goto FOO)
1952  *   int allow_pack : if true, : characters will also be allowed (require,
1953  *       use, etc. do this)
1954  *   int allow_initial_tick : used by the "sub" lexer only.
1955  */
1956
1957 STATIC char *
1958 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1959 {
1960     dVAR;
1961     register char *s;
1962     STRLEN len;
1963
1964     PERL_ARGS_ASSERT_FORCE_WORD;
1965
1966     start = SKIPSPACE1(start);
1967     s = start;
1968     if (isIDFIRST_lazy_if(s,UTF) ||
1969         (allow_pack && *s == ':') ||
1970         (allow_initial_tick && *s == '\'') )
1971     {
1972         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1973         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1974             return start;
1975         start_force(PL_curforce);
1976         if (PL_madskills)
1977             curmad('X', newSVpvn(start,s-start));
1978         if (token == METHOD) {
1979             s = SKIPSPACE1(s);
1980             if (*s == '(')
1981                 PL_expect = XTERM;
1982             else {
1983                 PL_expect = XOPERATOR;
1984             }
1985         }
1986         if (PL_madskills)
1987             curmad('g', newSVpvs( "forced" ));
1988         NEXTVAL_NEXTTOKE.opval
1989             = (OP*)newSVOP(OP_CONST,0,
1990                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1991         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1992         force_next(token);
1993     }
1994     return s;
1995 }
1996
1997 /*
1998  * S_force_ident
1999  * Called when the lexer wants $foo *foo &foo etc, but the program
2000  * text only contains the "foo" portion.  The first argument is a pointer
2001  * to the "foo", and the second argument is the type symbol to prefix.
2002  * Forces the next token to be a "WORD".
2003  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2004  */
2005
2006 STATIC void
2007 S_force_ident(pTHX_ register const char *s, int kind)
2008 {
2009     dVAR;
2010
2011     PERL_ARGS_ASSERT_FORCE_IDENT;
2012
2013     if (*s) {
2014         const STRLEN len = strlen(s);
2015         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2016         start_force(PL_curforce);
2017         NEXTVAL_NEXTTOKE.opval = o;
2018         force_next(WORD);
2019         if (kind) {
2020             o->op_private = OPpCONST_ENTERED;
2021             /* XXX see note in pp_entereval() for why we forgo typo
2022                warnings if the symbol must be introduced in an eval.
2023                GSAR 96-10-12 */
2024             gv_fetchpvn_flags(s, len,
2025                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2026                               : GV_ADD,
2027                               kind == '$' ? SVt_PV :
2028                               kind == '@' ? SVt_PVAV :
2029                               kind == '%' ? SVt_PVHV :
2030                               SVt_PVGV
2031                               );
2032         }
2033     }
2034 }
2035
2036 NV
2037 Perl_str_to_version(pTHX_ SV *sv)
2038 {
2039     NV retval = 0.0;
2040     NV nshift = 1.0;
2041     STRLEN len;
2042     const char *start = SvPV_const(sv,len);
2043     const char * const end = start + len;
2044     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2045
2046     PERL_ARGS_ASSERT_STR_TO_VERSION;
2047
2048     while (start < end) {
2049         STRLEN skip;
2050         UV n;
2051         if (utf)
2052             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2053         else {
2054             n = *(U8*)start;
2055             skip = 1;
2056         }
2057         retval += ((NV)n)/nshift;
2058         start += skip;
2059         nshift *= 1000;
2060     }
2061     return retval;
2062 }
2063
2064 /*
2065  * S_force_version
2066  * Forces the next token to be a version number.
2067  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2068  * and if "guessing" is TRUE, then no new token is created (and the caller
2069  * must use an alternative parsing method).
2070  */
2071
2072 STATIC char *
2073 S_force_version(pTHX_ char *s, int guessing)
2074 {
2075     dVAR;
2076     OP *version = NULL;
2077     char *d;
2078 #ifdef PERL_MAD
2079     I32 startoff = s - SvPVX(PL_linestr);
2080 #endif
2081
2082     PERL_ARGS_ASSERT_FORCE_VERSION;
2083
2084     s = SKIPSPACE1(s);
2085
2086     d = s;
2087     if (*d == 'v')
2088         d++;
2089     if (isDIGIT(*d)) {
2090         while (isDIGIT(*d) || *d == '_' || *d == '.')
2091             d++;
2092 #ifdef PERL_MAD
2093         if (PL_madskills) {
2094             start_force(PL_curforce);
2095             curmad('X', newSVpvn(s,d-s));
2096         }
2097 #endif
2098         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2099             SV *ver;
2100             s = scan_num(s, &pl_yylval);
2101             version = pl_yylval.opval;
2102             ver = cSVOPx(version)->op_sv;
2103             if (SvPOK(ver) && !SvNIOK(ver)) {
2104                 SvUPGRADE(ver, SVt_PVNV);
2105                 SvNV_set(ver, str_to_version(ver));
2106                 SvNOK_on(ver);          /* hint that it is a version */
2107             }
2108         }
2109         else if (guessing) {
2110 #ifdef PERL_MAD
2111             if (PL_madskills) {
2112                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2113                 PL_nextwhite = 0;
2114                 s = SvPVX(PL_linestr) + startoff;
2115             }
2116 #endif
2117             return s;
2118         }
2119     }
2120
2121 #ifdef PERL_MAD
2122     if (PL_madskills && !version) {
2123         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2124         PL_nextwhite = 0;
2125         s = SvPVX(PL_linestr) + startoff;
2126     }
2127 #endif
2128     /* NOTE: The parser sees the package name and the VERSION swapped */
2129     start_force(PL_curforce);
2130     NEXTVAL_NEXTTOKE.opval = version;
2131     force_next(WORD);
2132
2133     return s;
2134 }
2135
2136 /*
2137  * S_tokeq
2138  * Tokenize a quoted string passed in as an SV.  It finds the next
2139  * chunk, up to end of string or a backslash.  It may make a new
2140  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2141  * turns \\ into \.
2142  */
2143
2144 STATIC SV *
2145 S_tokeq(pTHX_ SV *sv)
2146 {
2147     dVAR;
2148     register char *s;
2149     register char *send;
2150     register char *d;
2151     STRLEN len = 0;
2152     SV *pv = sv;
2153
2154     PERL_ARGS_ASSERT_TOKEQ;
2155
2156     if (!SvLEN(sv))
2157         goto finish;
2158
2159     s = SvPV_force(sv, len);
2160     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2161         goto finish;
2162     send = s + len;
2163     while (s < send && *s != '\\')
2164         s++;
2165     if (s == send)
2166         goto finish;
2167     d = s;
2168     if ( PL_hints & HINT_NEW_STRING ) {
2169         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2170     }
2171     while (s < send) {
2172         if (*s == '\\') {
2173             if (s + 1 < send && (s[1] == '\\'))
2174                 s++;            /* all that, just for this */
2175         }
2176         *d++ = *s++;
2177     }
2178     *d = '\0';
2179     SvCUR_set(sv, d - SvPVX_const(sv));
2180   finish:
2181     if ( PL_hints & HINT_NEW_STRING )
2182        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2183     return sv;
2184 }
2185
2186 /*
2187  * Now come three functions related to double-quote context,
2188  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2189  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2190  * interact with PL_lex_state, and create fake ( ... ) argument lists
2191  * to handle functions and concatenation.
2192  * They assume that whoever calls them will be setting up a fake
2193  * join call, because each subthing puts a ',' after it.  This lets
2194  *   "lower \luPpEr"
2195  * become
2196  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2197  *
2198  * (I'm not sure whether the spurious commas at the end of lcfirst's
2199  * arguments and join's arguments are created or not).
2200  */
2201
2202 /*
2203  * S_sublex_start
2204  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2205  *
2206  * Pattern matching will set PL_lex_op to the pattern-matching op to
2207  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2208  *
2209  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2210  *
2211  * Everything else becomes a FUNC.
2212  *
2213  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2214  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2215  * call to S_sublex_push().
2216  */
2217
2218 STATIC I32
2219 S_sublex_start(pTHX)
2220 {
2221     dVAR;
2222     register const I32 op_type = pl_yylval.ival;
2223
2224     if (op_type == OP_NULL) {
2225         pl_yylval.opval = PL_lex_op;
2226         PL_lex_op = NULL;
2227         return THING;
2228     }
2229     if (op_type == OP_CONST || op_type == OP_READLINE) {
2230         SV *sv = tokeq(PL_lex_stuff);
2231
2232         if (SvTYPE(sv) == SVt_PVIV) {
2233             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2234             STRLEN len;
2235             const char * const p = SvPV_const(sv, len);
2236             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2237             SvREFCNT_dec(sv);
2238             sv = nsv;
2239         }
2240         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2241         PL_lex_stuff = NULL;
2242         /* Allow <FH> // "foo" */
2243         if (op_type == OP_READLINE)
2244             PL_expect = XTERMORDORDOR;
2245         return THING;
2246     }
2247     else if (op_type == OP_BACKTICK && PL_lex_op) {
2248         /* readpipe() vas overriden */
2249         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2250         pl_yylval.opval = PL_lex_op;
2251         PL_lex_op = NULL;
2252         PL_lex_stuff = NULL;
2253         return THING;
2254     }
2255
2256     PL_sublex_info.super_state = PL_lex_state;
2257     PL_sublex_info.sub_inwhat = (U16)op_type;
2258     PL_sublex_info.sub_op = PL_lex_op;
2259     PL_lex_state = LEX_INTERPPUSH;
2260
2261     PL_expect = XTERM;
2262     if (PL_lex_op) {
2263         pl_yylval.opval = PL_lex_op;
2264         PL_lex_op = NULL;
2265         return PMFUNC;
2266     }
2267     else
2268         return FUNC;
2269 }
2270
2271 /*
2272  * S_sublex_push
2273  * Create a new scope to save the lexing state.  The scope will be
2274  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2275  * to the uc, lc, etc. found before.
2276  * Sets PL_lex_state to LEX_INTERPCONCAT.
2277  */
2278
2279 STATIC I32
2280 S_sublex_push(pTHX)
2281 {
2282     dVAR;
2283     ENTER;
2284
2285     PL_lex_state = PL_sublex_info.super_state;
2286     SAVEBOOL(PL_lex_dojoin);
2287     SAVEI32(PL_lex_brackets);
2288     SAVEI32(PL_lex_casemods);
2289     SAVEI32(PL_lex_starts);
2290     SAVEI8(PL_lex_state);
2291     SAVEVPTR(PL_lex_inpat);
2292     SAVEI16(PL_lex_inwhat);
2293     SAVECOPLINE(PL_curcop);
2294     SAVEPPTR(PL_bufptr);
2295     SAVEPPTR(PL_bufend);
2296     SAVEPPTR(PL_oldbufptr);
2297     SAVEPPTR(PL_oldoldbufptr);
2298     SAVEPPTR(PL_last_lop);
2299     SAVEPPTR(PL_last_uni);
2300     SAVEPPTR(PL_linestart);
2301     SAVESPTR(PL_linestr);
2302     SAVEGENERICPV(PL_lex_brackstack);
2303     SAVEGENERICPV(PL_lex_casestack);
2304
2305     PL_linestr = PL_lex_stuff;
2306     PL_lex_stuff = NULL;
2307
2308     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2309         = SvPVX(PL_linestr);
2310     PL_bufend += SvCUR(PL_linestr);
2311     PL_last_lop = PL_last_uni = NULL;
2312     SAVEFREESV(PL_linestr);
2313
2314     PL_lex_dojoin = FALSE;
2315     PL_lex_brackets = 0;
2316     Newx(PL_lex_brackstack, 120, char);
2317     Newx(PL_lex_casestack, 12, char);
2318     PL_lex_casemods = 0;
2319     *PL_lex_casestack = '\0';
2320     PL_lex_starts = 0;
2321     PL_lex_state = LEX_INTERPCONCAT;
2322     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2323
2324     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2325     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2326         PL_lex_inpat = PL_sublex_info.sub_op;
2327     else
2328         PL_lex_inpat = NULL;
2329
2330     return '(';
2331 }
2332
2333 /*
2334  * S_sublex_done
2335  * Restores lexer state after a S_sublex_push.
2336  */
2337
2338 STATIC I32
2339 S_sublex_done(pTHX)
2340 {
2341     dVAR;
2342     if (!PL_lex_starts++) {
2343         SV * const sv = newSVpvs("");
2344         if (SvUTF8(PL_linestr))
2345             SvUTF8_on(sv);
2346         PL_expect = XOPERATOR;
2347         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2348         return THING;
2349     }
2350
2351     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2352         PL_lex_state = LEX_INTERPCASEMOD;
2353         return yylex();
2354     }
2355
2356     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2357     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2358         PL_linestr = PL_lex_repl;
2359         PL_lex_inpat = 0;
2360         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2361         PL_bufend += SvCUR(PL_linestr);
2362         PL_last_lop = PL_last_uni = NULL;
2363         SAVEFREESV(PL_linestr);
2364         PL_lex_dojoin = FALSE;
2365         PL_lex_brackets = 0;
2366         PL_lex_casemods = 0;
2367         *PL_lex_casestack = '\0';
2368         PL_lex_starts = 0;
2369         if (SvEVALED(PL_lex_repl)) {
2370             PL_lex_state = LEX_INTERPNORMAL;
2371             PL_lex_starts++;
2372             /*  we don't clear PL_lex_repl here, so that we can check later
2373                 whether this is an evalled subst; that means we rely on the
2374                 logic to ensure sublex_done() is called again only via the
2375                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2376         }
2377         else {
2378             PL_lex_state = LEX_INTERPCONCAT;
2379             PL_lex_repl = NULL;
2380         }
2381         return ',';
2382     }
2383     else {
2384 #ifdef PERL_MAD
2385         if (PL_madskills) {
2386             if (PL_thiswhite) {
2387                 if (!PL_endwhite)
2388                     PL_endwhite = newSVpvs("");
2389                 sv_catsv(PL_endwhite, PL_thiswhite);
2390                 PL_thiswhite = 0;
2391             }
2392             if (PL_thistoken)
2393                 sv_setpvs(PL_thistoken,"");
2394             else
2395                 PL_realtokenstart = -1;
2396         }
2397 #endif
2398         LEAVE;
2399         PL_bufend = SvPVX(PL_linestr);
2400         PL_bufend += SvCUR(PL_linestr);
2401         PL_expect = XOPERATOR;
2402         PL_sublex_info.sub_inwhat = 0;
2403         return ')';
2404     }
2405 }
2406
2407 /*
2408   scan_const
2409
2410   Extracts a pattern, double-quoted string, or transliteration.  This
2411   is terrifying code.
2412
2413   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2414   processing a pattern (PL_lex_inpat is true), a transliteration
2415   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2416
2417   Returns a pointer to the character scanned up to. If this is
2418   advanced from the start pointer supplied (i.e. if anything was
2419   successfully parsed), will leave an OP for the substring scanned
2420   in pl_yylval. Caller must intuit reason for not parsing further
2421   by looking at the next characters herself.
2422
2423   In patterns:
2424     backslashes:
2425       double-quoted style: \r and \n
2426       regexp special ones: \D \s
2427       constants: \x31
2428       backrefs: \1
2429       case and quoting: \U \Q \E
2430     stops on @ and $, but not for $ as tail anchor
2431
2432   In transliterations:
2433     characters are VERY literal, except for - not at the start or end
2434     of the string, which indicates a range. If the range is in bytes,
2435     scan_const expands the range to the full set of intermediate
2436     characters. If the range is in utf8, the hyphen is replaced with
2437     a certain range mark which will be handled by pmtrans() in op.c.
2438
2439   In double-quoted strings:
2440     backslashes:
2441       double-quoted style: \r and \n
2442       constants: \x31
2443       deprecated backrefs: \1 (in substitution replacements)
2444       case and quoting: \U \Q \E
2445     stops on @ and $
2446
2447   scan_const does *not* construct ops to handle interpolated strings.
2448   It stops processing as soon as it finds an embedded $ or @ variable
2449   and leaves it to the caller to work out what's going on.
2450
2451   embedded arrays (whether in pattern or not) could be:
2452       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2453
2454   $ in double-quoted strings must be the symbol of an embedded scalar.
2455
2456   $ in pattern could be $foo or could be tail anchor.  Assumption:
2457   it's a tail anchor if $ is the last thing in the string, or if it's
2458   followed by one of "()| \r\n\t"
2459
2460   \1 (backreferences) are turned into $1
2461
2462   The structure of the code is
2463       while (there's a character to process) {
2464           handle transliteration ranges
2465           skip regexp comments /(?#comment)/ and codes /(?{code})/
2466           skip #-initiated comments in //x patterns
2467           check for embedded arrays
2468           check for embedded scalars
2469           if (backslash) {
2470               leave intact backslashes from leaveit (below)
2471               deprecate \1 in substitution replacements
2472               handle string-changing backslashes \l \U \Q \E, etc.
2473               switch (what was escaped) {
2474                   handle \- in a transliteration (becomes a literal -)
2475                   handle \132 (octal characters)
2476                   handle \x15 and \x{1234} (hex characters)
2477                   handle \N{name} (named characters)
2478                   handle \cV (control characters)
2479                   handle printf-style backslashes (\f, \r, \n, etc)
2480               } (end switch)
2481               continue
2482           } (end if backslash)
2483           handle regular character
2484     } (end while character to read)
2485
2486 */
2487
2488 STATIC char *
2489 S_scan_const(pTHX_ char *start)
2490 {
2491     dVAR;
2492     register char *send = PL_bufend;            /* end of the constant */
2493     SV *sv = newSV(send - start);               /* sv for the constant.  See
2494                                                    note below on sizing. */
2495     register char *s = start;                   /* start of the constant */
2496     register char *d = SvPVX(sv);               /* destination for copies */
2497     bool dorange = FALSE;                       /* are we in a translit range? */
2498     bool didrange = FALSE;                      /* did we just finish a range? */
2499     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2500     I32  this_utf8 = UTF;                       /* Is the source string assumed
2501                                                    to be UTF8?  But, this can
2502                                                    show as true when the source
2503                                                    isn't utf8, as for example
2504                                                    when it is entirely composed
2505                                                    of hex constants */
2506
2507     /* Note on sizing:  The scanned constant is placed into sv, which is
2508      * initialized by newSV() assuming one byte of output for every byte of
2509      * input.  This routine expects newSV() to allocate an extra byte for a
2510      * trailing NUL, which this routine will append if it gets to the end of
2511      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2512      * CAPITAL LETTER A}), or more output than input if the constant ends up
2513      * recoded to utf8, but each time a construct is found that might increase
2514      * the needed size, SvGROW() is called.  Its size parameter each time is
2515      * based on the best guess estimate at the time, namely the length used so
2516      * far, plus the length the current construct will occupy, plus room for
2517      * the trailing NUL, plus one byte for every input byte still unscanned */
2518
2519     UV uv;
2520 #ifdef EBCDIC
2521     UV literal_endpoint = 0;
2522     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2523 #endif
2524
2525     PERL_ARGS_ASSERT_SCAN_CONST;
2526
2527     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2528         /* If we are doing a trans and we know we want UTF8 set expectation */
2529         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2530         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2531     }
2532
2533
2534     while (s < send || dorange) {
2535         /* get transliterations out of the way (they're most literal) */
2536         if (PL_lex_inwhat == OP_TRANS) {
2537             /* expand a range A-Z to the full set of characters.  AIE! */
2538             if (dorange) {
2539                 I32 i;                          /* current expanded character */
2540                 I32 min;                        /* first character in range */
2541                 I32 max;                        /* last character in range */
2542
2543 #ifdef EBCDIC
2544                 UV uvmax = 0;
2545 #endif
2546
2547                 if (has_utf8
2548 #ifdef EBCDIC
2549                     && !native_range
2550 #endif
2551                     ) {
2552                     char * const c = (char*)utf8_hop((U8*)d, -1);
2553                     char *e = d++;
2554                     while (e-- > c)
2555                         *(e + 1) = *e;
2556                     *c = (char)UTF_TO_NATIVE(0xff);
2557                     /* mark the range as done, and continue */
2558                     dorange = FALSE;
2559                     didrange = TRUE;
2560                     continue;
2561                 }
2562
2563                 i = d - SvPVX_const(sv);                /* remember current offset */
2564 #ifdef EBCDIC
2565                 SvGROW(sv,
2566                        SvLEN(sv) + (has_utf8 ?
2567                                     (512 - UTF_CONTINUATION_MARK +
2568                                      UNISKIP(0x100))
2569                                     : 256));
2570                 /* How many two-byte within 0..255: 128 in UTF-8,
2571                  * 96 in UTF-8-mod. */
2572 #else
2573                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2574 #endif
2575                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2576 #ifdef EBCDIC
2577                 if (has_utf8) {
2578                     int j;
2579                     for (j = 0; j <= 1; j++) {
2580                         char * const c = (char*)utf8_hop((U8*)d, -1);
2581                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2582                         if (j)
2583                             min = (U8)uv;
2584                         else if (uv < 256)
2585                             max = (U8)uv;
2586                         else {
2587                             max = (U8)0xff; /* only to \xff */
2588                             uvmax = uv; /* \x{100} to uvmax */
2589                         }
2590                         d = c; /* eat endpoint chars */
2591                      }
2592                 }
2593                else {
2594 #endif
2595                    d -= 2;              /* eat the first char and the - */
2596                    min = (U8)*d;        /* first char in range */
2597                    max = (U8)d[1];      /* last char in range  */
2598 #ifdef EBCDIC
2599                }
2600 #endif
2601
2602                 if (min > max) {
2603                     Perl_croak(aTHX_
2604                                "Invalid range \"%c-%c\" in transliteration operator",
2605                                (char)min, (char)max);
2606                 }
2607
2608 #ifdef EBCDIC
2609                 if (literal_endpoint == 2 &&
2610                     ((isLOWER(min) && isLOWER(max)) ||
2611                      (isUPPER(min) && isUPPER(max)))) {
2612                     if (isLOWER(min)) {
2613                         for (i = min; i <= max; i++)
2614                             if (isLOWER(i))
2615                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2616                     } else {
2617                         for (i = min; i <= max; i++)
2618                             if (isUPPER(i))
2619                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2620                     }
2621                 }
2622                 else
2623 #endif
2624                     for (i = min; i <= max; i++)
2625 #ifdef EBCDIC
2626                         if (has_utf8) {
2627                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2628                             if (UNI_IS_INVARIANT(ch))
2629                                 *d++ = (U8)i;
2630                             else {
2631                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2632                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2633                             }
2634                         }
2635                         else
2636 #endif
2637                             *d++ = (char)i;
2638
2639 #ifdef EBCDIC
2640                 if (uvmax) {
2641                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2642                     if (uvmax > 0x101)
2643                         *d++ = (char)UTF_TO_NATIVE(0xff);
2644                     if (uvmax > 0x100)
2645                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2646                 }
2647 #endif
2648
2649                 /* mark the range as done, and continue */
2650                 dorange = FALSE;
2651                 didrange = TRUE;
2652 #ifdef EBCDIC
2653                 literal_endpoint = 0;
2654 #endif
2655                 continue;
2656             }
2657
2658             /* range begins (ignore - as first or last char) */
2659             else if (*s == '-' && s+1 < send  && s != start) {
2660                 if (didrange) {
2661                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2662                 }
2663                 if (has_utf8
2664 #ifdef EBCDIC
2665                     && !native_range
2666 #endif
2667                     ) {
2668                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2669                     s++;
2670                     continue;
2671                 }
2672                 dorange = TRUE;
2673                 s++;
2674             }
2675             else {
2676                 didrange = FALSE;
2677 #ifdef EBCDIC
2678                 literal_endpoint = 0;
2679                 native_range = TRUE;
2680 #endif
2681             }
2682         }
2683
2684         /* if we get here, we're not doing a transliteration */
2685
2686         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2687            except for the last char, which will be done separately. */
2688         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2689             if (s[2] == '#') {
2690                 while (s+1 < send && *s != ')')
2691                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2692             }
2693             else if (s[2] == '{' /* This should match regcomp.c */
2694                     || (s[2] == '?' && s[3] == '{'))
2695             {
2696                 I32 count = 1;
2697                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2698                 char c;
2699
2700                 while (count && (c = *regparse)) {
2701                     if (c == '\\' && regparse[1])
2702                         regparse++;
2703                     else if (c == '{')
2704                         count++;
2705                     else if (c == '}')
2706                         count--;
2707                     regparse++;
2708                 }
2709                 if (*regparse != ')')
2710                     regparse--;         /* Leave one char for continuation. */
2711                 while (s < regparse)
2712                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2713             }
2714         }
2715
2716         /* likewise skip #-initiated comments in //x patterns */
2717         else if (*s == '#' && PL_lex_inpat &&
2718           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2719             while (s+1 < send && *s != '\n')
2720                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2721         }
2722
2723         /* check for embedded arrays
2724            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2725            */
2726         else if (*s == '@' && s[1]) {
2727             if (isALNUM_lazy_if(s+1,UTF))
2728                 break;
2729             if (strchr(":'{$", s[1]))
2730                 break;
2731             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2732                 break; /* in regexp, neither @+ nor @- are interpolated */
2733         }
2734
2735         /* check for embedded scalars.  only stop if we're sure it's a
2736            variable.
2737         */
2738         else if (*s == '$') {
2739             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2740                 break;
2741             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2742                 if (s[1] == '\\') {
2743                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2744                                    "Possible unintended interpolation of $\\ in regex");
2745                 }
2746                 break;          /* in regexp, $ might be tail anchor */
2747             }
2748         }
2749
2750         /* End of else if chain - OP_TRANS rejoin rest */
2751
2752         /* backslashes */
2753         if (*s == '\\' && s+1 < send) {
2754             s++;
2755
2756             /* deprecate \1 in strings and substitution replacements */
2757             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2758                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2759             {
2760                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2761                 *--s = '$';
2762                 break;
2763             }
2764
2765             /* string-change backslash escapes */
2766             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2767                 --s;
2768                 break;
2769             }
2770             /* skip any other backslash escapes in a pattern */
2771             else if (PL_lex_inpat) {
2772                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2773                 goto default_action;
2774             }
2775
2776             /* if we get here, it's either a quoted -, or a digit */
2777             switch (*s) {
2778
2779             /* quoted - in transliterations */
2780             case '-':
2781                 if (PL_lex_inwhat == OP_TRANS) {
2782                     *d++ = *s++;
2783                     continue;
2784                 }
2785                 /* FALL THROUGH */
2786             default:
2787                 {
2788                     if ((isALPHA(*s) || isDIGIT(*s)))
2789                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2790                                        "Unrecognized escape \\%c passed through",
2791                                        *s);
2792                     /* default action is to copy the quoted character */
2793                     goto default_action;
2794                 }
2795
2796             /* eg. \132 indicates the octal constant 0x132 */
2797             case '0': case '1': case '2': case '3':
2798             case '4': case '5': case '6': case '7':
2799                 {
2800                     I32 flags = 0;
2801                     STRLEN len = 3;
2802                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2803                     s += len;
2804                 }
2805                 goto NUM_ESCAPE_INSERT;
2806
2807             /* eg. \x24 indicates the hex constant 0x24 */
2808             case 'x':
2809                 ++s;
2810                 if (*s == '{') {
2811                     char* const e = strchr(s, '}');
2812                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2813                       PERL_SCAN_DISALLOW_PREFIX;
2814                     STRLEN len;
2815
2816                     ++s;
2817                     if (!e) {
2818                         yyerror("Missing right brace on \\x{}");
2819                         continue;
2820                     }
2821                     len = e - s;
2822                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2823                     s = e + 1;
2824                 }
2825                 else {
2826                     {
2827                         STRLEN len = 2;
2828                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2829                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2830                         s += len;
2831                     }
2832                 }
2833
2834               NUM_ESCAPE_INSERT:
2835                 /* Insert oct, hex, or \N{U+...} escaped character.  There will
2836                  * always be enough room in sv since such escapes will be
2837                  * longer than any UTF-8 sequence they can end up as, except if
2838                  * they force us to recode the rest of the string into utf8 */
2839
2840                 /* Here uv is the ordinal of the next character being added in
2841                  * unicode (converted from native).  (It has to be done before
2842                  * here because \N is interpreted as unicode, and oct and hex
2843                  * as native.) */
2844                 if (!UNI_IS_INVARIANT(uv)) {
2845                     if (!has_utf8 && uv > 255) {
2846                         /* Might need to recode whatever we have accumulated so
2847                          * far if it contains any chars variant in utf8 or
2848                          * utf-ebcdic. */
2849
2850                         SvCUR_set(sv, d - SvPVX_const(sv));
2851                         SvPOK_on(sv);
2852                         *d = '\0';
2853                         /* See Note on sizing above.  */
2854                         sv_utf8_upgrade_flags_grow(sv,
2855                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2856                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2857                         d = SvPVX(sv) + SvCUR(sv);
2858                         has_utf8 = TRUE;
2859                     }
2860
2861                     if (has_utf8) {
2862                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2863                         if (PL_lex_inwhat == OP_TRANS &&
2864                             PL_sublex_info.sub_op) {
2865                             PL_sublex_info.sub_op->op_private |=
2866                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2867                                              : OPpTRANS_TO_UTF);
2868                         }
2869 #ifdef EBCDIC
2870                         if (uv > 255 && !dorange)
2871                             native_range = FALSE;
2872 #endif
2873                     }
2874                     else {
2875                         *d++ = (char)uv;
2876                     }
2877                 }
2878                 else {
2879                     *d++ = (char) uv;
2880                 }
2881                 continue;
2882
2883             /* \N{LATIN SMALL LETTER A} is a named character, and so is
2884              * \N{U+0041} */
2885             case 'N':
2886                 ++s;
2887                 if (*s == '{') {
2888                     char* e = strchr(s, '}');
2889                     SV *res;
2890                     STRLEN len;
2891                     const char *str;
2892
2893                     if (!e) {
2894                         yyerror("Missing right brace on \\N{}");
2895                         e = s - 1;
2896                         goto cont_scan;
2897                     }
2898                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2899                         /* \N{U+...} The ... is a unicode value even on EBCDIC
2900                          * machines */
2901                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2902                           PERL_SCAN_DISALLOW_PREFIX;
2903                         s += 3;
2904                         len = e - s;
2905                         uv = grok_hex(s, &len, &flags, NULL);
2906                         if ( e > s && len != (STRLEN)(e - s) ) {
2907                             uv = 0xFFFD;
2908                         }
2909                         s = e + 1;
2910                         goto NUM_ESCAPE_INSERT;
2911                     }
2912                     res = newSVpvn(s + 1, e - s - 1);
2913                     res = new_constant( NULL, 0, "charnames",
2914                                         res, NULL, s - 2, e - s + 3 );
2915                     if (has_utf8)
2916                         sv_utf8_upgrade(res);
2917                     str = SvPV_const(res,len);
2918 #ifdef EBCDIC_NEVER_MIND
2919                     /* charnames uses pack U and that has been
2920                      * recently changed to do the below uni->native
2921                      * mapping, so this would be redundant (and wrong,
2922                      * the code point would be doubly converted).
2923                      * But leave this in just in case the pack U change
2924                      * gets revoked, but the semantics is still
2925                      * desireable for charnames. --jhi */
2926                     {
2927                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2928
2929                          if (uv < 0x100) {
2930                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2931
2932                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2933                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2934                               str = SvPV_const(res, len);
2935                          }
2936                     }
2937 #endif
2938                     /* If destination is not in utf8 but this new character is,
2939                      * recode the dest to utf8 */
2940                     if (!has_utf8 && SvUTF8(res)) {
2941                         SvCUR_set(sv, d - SvPVX_const(sv));
2942                         SvPOK_on(sv);
2943                         *d = '\0';
2944                         /* See Note on sizing above.  */
2945                         sv_utf8_upgrade_flags_grow(sv,
2946                                             SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2947                                             len + (STRLEN)(send - s) + 1);
2948                         d = SvPVX(sv) + SvCUR(sv);
2949                         has_utf8 = TRUE;
2950                     } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2951
2952                         /* See Note on sizing above.  (NOTE: SvCUR() is not set
2953                          * correctly here). */
2954                         const STRLEN off = d - SvPVX_const(sv);
2955                         d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2956                     }
2957 #ifdef EBCDIC
2958                     if (!dorange)
2959                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2960 #endif
2961                     Copy(str, d, len, char);
2962                     d += len;
2963                     SvREFCNT_dec(res);
2964                   cont_scan:
2965                     s = e + 1;
2966                 }
2967                 else
2968                     yyerror("Missing braces on \\N{}");
2969                 continue;
2970
2971             /* \c is a control character */
2972             case 'c':
2973                 s++;
2974                 if (s < send) {
2975                     U8 c = *s++;
2976 #ifdef EBCDIC
2977                     if (isLOWER(c))
2978                         c = toUPPER(c);
2979 #endif
2980                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2981                 }
2982                 else {
2983                     yyerror("Missing control char name in \\c");
2984                 }
2985                 continue;
2986
2987             /* printf-style backslashes, formfeeds, newlines, etc */
2988             case 'b':
2989                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2990                 break;
2991             case 'n':
2992                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2993                 break;
2994             case 'r':
2995                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2996                 break;
2997             case 'f':
2998                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2999                 break;
3000             case 't':
3001                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3002                 break;
3003             case 'e':
3004                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3005                 break;
3006             case 'a':
3007                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3008                 break;
3009             } /* end switch */
3010
3011             s++;
3012             continue;
3013         } /* end if (backslash) */
3014 #ifdef EBCDIC
3015         else
3016             literal_endpoint++;
3017 #endif
3018
3019     default_action:
3020         /* If we started with encoded form, or already know we want it,
3021            then encode the next character */
3022         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3023             STRLEN len  = 1;
3024
3025
3026             /* One might think that it is wasted effort in the case of the
3027              * source being utf8 (this_utf8 == TRUE) to take the next character
3028              * in the source, convert it to an unsigned value, and then convert
3029              * it back again.  But the source has not been validated here.  The
3030              * routine that does the conversion checks for errors like
3031              * malformed utf8 */
3032
3033             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3034             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3035             if (!has_utf8) {
3036                 SvCUR_set(sv, d - SvPVX_const(sv));
3037                 SvPOK_on(sv);
3038                 *d = '\0';
3039                 /* See Note on sizing above.  */
3040                 sv_utf8_upgrade_flags_grow(sv,
3041                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3042                                         need + (STRLEN)(send - s) + 1);
3043                 d = SvPVX(sv) + SvCUR(sv);
3044                 has_utf8 = TRUE;
3045             } else if (need > len) {
3046                 /* encoded value larger than old, may need extra space (NOTE:
3047                  * SvCUR() is not set correctly here).   See Note on sizing
3048                  * above.  */
3049                 const STRLEN off = d - SvPVX_const(sv);
3050                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3051             }
3052             s += len;
3053
3054             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3055 #ifdef EBCDIC
3056             if (uv > 255 && !dorange)
3057                 native_range = FALSE;
3058 #endif
3059         }
3060         else {
3061             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3062         }
3063     } /* while loop to process each character */
3064
3065     /* terminate the string and set up the sv */
3066     *d = '\0';
3067     SvCUR_set(sv, d - SvPVX_const(sv));
3068     if (SvCUR(sv) >= SvLEN(sv))
3069         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3070
3071     SvPOK_on(sv);
3072     if (PL_encoding && !has_utf8) {
3073         sv_recode_to_utf8(sv, PL_encoding);
3074         if (SvUTF8(sv))
3075             has_utf8 = TRUE;
3076     }
3077     if (has_utf8) {
3078         SvUTF8_on(sv);
3079         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3080             PL_sublex_info.sub_op->op_private |=
3081                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3082         }
3083     }
3084
3085     /* shrink the sv if we allocated more than we used */
3086     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3087         SvPV_shrink_to_cur(sv);
3088     }
3089
3090     /* return the substring (via pl_yylval) only if we parsed anything */
3091     if (s > PL_bufptr) {
3092         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3093             const char *const key = PL_lex_inpat ? "qr" : "q";
3094             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3095             const char *type;
3096             STRLEN typelen;
3097
3098             if (PL_lex_inwhat == OP_TRANS) {
3099                 type = "tr";
3100                 typelen = 2;
3101             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3102                 type = "s";
3103                 typelen = 1;
3104             } else  {
3105                 type = "qq";
3106                 typelen = 2;
3107             }
3108
3109             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3110                                 type, typelen);
3111         }
3112         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3113     } else
3114         SvREFCNT_dec(sv);
3115     return s;
3116 }
3117
3118 /* S_intuit_more
3119  * Returns TRUE if there's more to the expression (e.g., a subscript),
3120  * FALSE otherwise.
3121  *
3122  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3123  *
3124  * ->[ and ->{ return TRUE
3125  * { and [ outside a pattern are always subscripts, so return TRUE
3126  * if we're outside a pattern and it's not { or [, then return FALSE
3127  * if we're in a pattern and the first char is a {
3128  *   {4,5} (any digits around the comma) returns FALSE
3129  * if we're in a pattern and the first char is a [
3130  *   [] returns FALSE
3131  *   [SOMETHING] has a funky algorithm to decide whether it's a
3132  *      character class or not.  It has to deal with things like
3133  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3134  * anything else returns TRUE
3135  */
3136
3137 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3138
3139 STATIC int
3140 S_intuit_more(pTHX_ register char *s)
3141 {
3142     dVAR;
3143
3144     PERL_ARGS_ASSERT_INTUIT_MORE;
3145
3146     if (PL_lex_brackets)
3147         return TRUE;
3148     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3149         return TRUE;
3150     if (*s != '{' && *s != '[')
3151         return FALSE;
3152     if (!PL_lex_inpat)
3153         return TRUE;
3154
3155     /* In a pattern, so maybe we have {n,m}. */
3156     if (*s == '{') {
3157         s++;
3158         if (!isDIGIT(*s))
3159             return TRUE;
3160         while (isDIGIT(*s))
3161             s++;
3162         if (*s == ',')
3163             s++;
3164         while (isDIGIT(*s))
3165             s++;
3166         if (*s == '}')
3167             return FALSE;
3168         return TRUE;
3169
3170     }
3171
3172     /* On the other hand, maybe we have a character class */
3173
3174     s++;
3175     if (*s == ']' || *s == '^')
3176         return FALSE;
3177     else {
3178         /* this is terrifying, and it works */
3179         int weight = 2;         /* let's weigh the evidence */
3180         char seen[256];
3181         unsigned char un_char = 255, last_un_char;
3182         const char * const send = strchr(s,']');
3183         char tmpbuf[sizeof PL_tokenbuf * 4];
3184
3185         if (!send)              /* has to be an expression */
3186             return TRUE;
3187
3188         Zero(seen,256,char);
3189         if (*s == '$')
3190             weight -= 3;
3191         else if (isDIGIT(*s)) {
3192             if (s[1] != ']') {
3193                 if (isDIGIT(s[1]) && s[2] == ']')
3194                     weight -= 10;
3195             }
3196             else
3197                 weight -= 100;
3198         }
3199         for (; s < send; s++) {
3200             last_un_char = un_char;
3201             un_char = (unsigned char)*s;
3202             switch (*s) {
3203             case '@':
3204             case '&':
3205             case '$':
3206                 weight -= seen[un_char] * 10;
3207                 if (isALNUM_lazy_if(s+1,UTF)) {
3208                     int len;
3209                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3210                     len = (int)strlen(tmpbuf);
3211                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3212                         weight -= 100;
3213                     else
3214                         weight -= 10;
3215                 }
3216                 else if (*s == '$' && s[1] &&
3217                   strchr("[#!%*<>()-=",s[1])) {
3218                     if (/*{*/ strchr("])} =",s[2]))
3219                         weight -= 10;
3220                     else
3221                         weight -= 1;
3222                 }
3223                 break;
3224             case '\\':
3225                 un_char = 254;
3226                 if (s[1]) {
3227                     if (strchr("wds]",s[1]))
3228                         weight += 100;
3229                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3230                         weight += 1;
3231                     else if (strchr("rnftbxcav",s[1]))
3232                         weight += 40;
3233                     else if (isDIGIT(s[1])) {
3234                         weight += 40;
3235                         while (s[1] && isDIGIT(s[1]))
3236                             s++;
3237                     }
3238                 }
3239                 else
3240                     weight += 100;
3241                 break;
3242             case '-':
3243                 if (s[1] == '\\')
3244                     weight += 50;
3245                 if (strchr("aA01! ",last_un_char))
3246                     weight += 30;
3247                 if (strchr("zZ79~",s[1]))
3248                     weight += 30;
3249                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3250                     weight -= 5;        /* cope with negative subscript */
3251                 break;
3252             default:
3253                 if (!isALNUM(last_un_char)
3254                     && !(last_un_char == '$' || last_un_char == '@'
3255                          || last_un_char == '&')
3256                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3257                     char *d = tmpbuf;
3258                     while (isALPHA(*s))
3259                         *d++ = *s++;
3260                     *d = '\0';
3261                     if (keyword(tmpbuf, d - tmpbuf, 0))
3262                         weight -= 150;
3263                 }
3264                 if (un_char == last_un_char + 1)
3265                     weight += 5;
3266                 weight -= seen[un_char];
3267                 break;
3268             }
3269             seen[un_char]++;
3270         }
3271         if (weight >= 0)        /* probably a character class */
3272             return FALSE;
3273     }
3274
3275     return TRUE;
3276 }
3277
3278 /*
3279  * S_intuit_method
3280  *
3281  * Does all the checking to disambiguate
3282  *   foo bar
3283  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3284  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3285  *
3286  * First argument is the stuff after the first token, e.g. "bar".
3287  *
3288  * Not a method if bar is a filehandle.
3289  * Not a method if foo is a subroutine prototyped to take a filehandle.
3290  * Not a method if it's really "Foo $bar"
3291  * Method if it's "foo $bar"
3292  * Not a method if it's really "print foo $bar"
3293  * Method if it's really "foo package::" (interpreted as package->foo)
3294  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3295  * Not a method if bar is a filehandle or package, but is quoted with
3296  *   =>
3297  */
3298
3299 STATIC int
3300 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3301 {
3302     dVAR;
3303     char *s = start + (*start == '$');
3304     char tmpbuf[sizeof PL_tokenbuf];
3305     STRLEN len;
3306     GV* indirgv;
3307 #ifdef PERL_MAD
3308     int soff;
3309 #endif
3310
3311     PERL_ARGS_ASSERT_INTUIT_METHOD;
3312
3313     if (gv) {
3314         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3315             return 0;
3316         if (cv) {
3317             if (SvPOK(cv)) {
3318                 const char *proto = SvPVX_const(cv);
3319                 if (proto) {
3320                     if (*proto == ';')
3321                         proto++;
3322                     if (*proto == '*')
3323                         return 0;
3324                 }
3325             }
3326         } else
3327             gv = NULL;
3328     }
3329     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3330     /* start is the beginning of the possible filehandle/object,
3331      * and s is the end of it
3332      * tmpbuf is a copy of it
3333      */
3334
3335     if (*start == '$') {
3336         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3337                 isUPPER(*PL_tokenbuf))
3338             return 0;
3339 #ifdef PERL_MAD
3340         len = start - SvPVX(PL_linestr);
3341 #endif
3342         s = PEEKSPACE(s);
3343 #ifdef PERL_MAD
3344         start = SvPVX(PL_linestr) + len;
3345 #endif
3346         PL_bufptr = start;
3347         PL_expect = XREF;
3348         return *s == '(' ? FUNCMETH : METHOD;
3349     }
3350     if (!keyword(tmpbuf, len, 0)) {
3351         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3352             len -= 2;
3353             tmpbuf[len] = '\0';
3354 #ifdef PERL_MAD
3355             soff = s - SvPVX(PL_linestr);
3356 #endif
3357             goto bare_package;
3358         }
3359         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3360         if (indirgv && GvCVu(indirgv))
3361             return 0;
3362         /* filehandle or package name makes it a method */
3363         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3364 #ifdef PERL_MAD
3365             soff = s - SvPVX(PL_linestr);
3366 #endif
3367             s = PEEKSPACE(s);
3368             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3369                 return 0;       /* no assumptions -- "=>" quotes bearword */
3370       bare_package:
3371             start_force(PL_curforce);
3372             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3373                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3374             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3375             if (PL_madskills)
3376                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3377             PL_expect = XTERM;
3378             force_next(WORD);
3379             PL_bufptr = s;
3380 #ifdef PERL_MAD
3381             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3382 #endif
3383             return *s == '(' ? FUNCMETH : METHOD;
3384         }
3385     }
3386     return 0;
3387 }
3388
3389 /* Encoded script support. filter_add() effectively inserts a
3390  * 'pre-processing' function into the current source input stream.
3391  * Note that the filter function only applies to the current source file
3392  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3393  *
3394  * The datasv parameter (which may be NULL) can be used to pass
3395  * private data to this instance of the filter. The filter function
3396  * can recover the SV using the FILTER_DATA macro and use it to
3397  * store private buffers and state information.
3398  *
3399  * The supplied datasv parameter is upgraded to a PVIO type
3400  * and the IoDIRP/IoANY field is used to store the function pointer,
3401  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3402  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3403  * private use must be set using malloc'd pointers.
3404  */
3405
3406 SV *
3407 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3408 {
3409     dVAR;
3410     if (!funcp)
3411         return NULL;
3412
3413     if (!PL_parser)
3414         return NULL;
3415
3416     if (!PL_rsfp_filters)
3417         PL_rsfp_filters = newAV();
3418     if (!datasv)
3419         datasv = newSV(0);
3420     SvUPGRADE(datasv, SVt_PVIO);
3421     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3422     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3423     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3424                           FPTR2DPTR(void *, IoANY(datasv)),
3425                           SvPV_nolen(datasv)));
3426     av_unshift(PL_rsfp_filters, 1);
3427     av_store(PL_rsfp_filters, 0, datasv) ;
3428     return(datasv);
3429 }
3430
3431
3432 /* Delete most recently added instance of this filter function. */
3433 void
3434 Perl_filter_del(pTHX_ filter_t funcp)
3435 {
3436     dVAR;
3437     SV *datasv;
3438
3439     PERL_ARGS_ASSERT_FILTER_DEL;
3440
3441 #ifdef DEBUGGING
3442     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3443                           FPTR2DPTR(void*, funcp)));
3444 #endif
3445     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3446         return;
3447     /* if filter is on top of stack (usual case) just pop it off */
3448     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3449     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3450         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3451         IoANY(datasv) = (void *)NULL;
3452         sv_free(av_pop(PL_rsfp_filters));
3453
3454         return;
3455     }
3456     /* we need to search for the correct entry and clear it     */
3457     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3458 }
3459
3460
3461 /* Invoke the idxth filter function for the current rsfp.        */
3462 /* maxlen 0 = read one text line */
3463 I32
3464 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3465 {
3466     dVAR;
3467     filter_t funcp;
3468     SV *datasv = NULL;
3469     /* This API is bad. It should have been using unsigned int for maxlen.
3470        Not sure if we want to change the API, but if not we should sanity
3471        check the value here.  */
3472     const unsigned int correct_length
3473         = maxlen < 0 ?
3474 #ifdef PERL_MICRO
3475         0x7FFFFFFF
3476 #else
3477         INT_MAX
3478 #endif
3479         : maxlen;
3480
3481     PERL_ARGS_ASSERT_FILTER_READ;
3482
3483     if (!PL_parser || !PL_rsfp_filters)
3484         return -1;
3485     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3486         /* Provide a default input filter to make life easy.    */
3487         /* Note that we append to the line. This is handy.      */
3488         DEBUG_P(PerlIO_printf(Perl_debug_log,
3489                               "filter_read %d: from rsfp\n", idx));
3490         if (correct_length) {
3491             /* Want a block */
3492             int len ;
3493             const int old_len = SvCUR(buf_sv);
3494
3495             /* ensure buf_sv is large enough */
3496             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3497             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3498                                    correct_length)) <= 0) {
3499                 if (PerlIO_error(PL_rsfp))
3500                     return -1;          /* error */
3501                 else
3502                     return 0 ;          /* end of file */
3503             }
3504             SvCUR_set(buf_sv, old_len + len) ;
3505             SvPVX(buf_sv)[old_len + len] = '\0';
3506         } else {
3507             /* Want a line */
3508             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3509                 if (PerlIO_error(PL_rsfp))
3510                     return -1;          /* error */
3511                 else
3512                     return 0 ;          /* end of file */
3513             }
3514         }
3515         return SvCUR(buf_sv);
3516     }
3517     /* Skip this filter slot if filter has been deleted */
3518     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3519         DEBUG_P(PerlIO_printf(Perl_debug_log,
3520                               "filter_read %d: skipped (filter deleted)\n",
3521                               idx));
3522         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3523     }
3524     /* Get function pointer hidden within datasv        */
3525     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3526     DEBUG_P(PerlIO_printf(Perl_debug_log,
3527                           "filter_read %d: via function %p (%s)\n",
3528                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3529     /* Call function. The function is expected to       */
3530     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3531     /* Return: <0:error, =0:eof, >0:not eof             */
3532     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3533 }
3534
3535 STATIC char *
3536 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3537 {
3538     dVAR;
3539
3540     PERL_ARGS_ASSERT_FILTER_GETS;
3541
3542 #ifdef PERL_CR_FILTER
3543     if (!PL_rsfp_filters) {
3544         filter_add(S_cr_textfilter,NULL);
3545     }
3546 #endif
3547     if (PL_rsfp_filters) {
3548         if (!append)
3549             SvCUR_set(sv, 0);   /* start with empty line        */
3550         if (FILTER_READ(0, sv, 0) > 0)
3551             return ( SvPVX(sv) ) ;
3552         else
3553             return NULL ;
3554     }
3555     else
3556         return (sv_gets(sv, PL_rsfp, append));
3557 }
3558
3559 STATIC HV *
3560 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3561 {
3562     dVAR;
3563     GV *gv;
3564
3565     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3566
3567     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3568         return PL_curstash;
3569
3570     if (len > 2 &&
3571         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3572         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3573     {
3574         return GvHV(gv);                        /* Foo:: */
3575     }
3576
3577     /* use constant CLASS => 'MyClass' */
3578     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3579     if (gv && GvCV(gv)) {
3580         SV * const sv = cv_const_sv(GvCV(gv));
3581         if (sv)
3582             pkgname = SvPV_const(sv, len);
3583     }
3584
3585     return gv_stashpvn(pkgname, len, 0);
3586 }
3587
3588 /*
3589  * S_readpipe_override
3590  * Check whether readpipe() is overriden, and generates the appropriate
3591  * optree, provided sublex_start() is called afterwards.
3592  */
3593 STATIC void
3594 S_readpipe_override(pTHX)
3595 {
3596     GV **gvp;
3597     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3598     pl_yylval.ival = OP_BACKTICK;
3599     if ((gv_readpipe
3600                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3601             ||
3602             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3603              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3604              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3605     {
3606         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3607             append_elem(OP_LIST,
3608                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3609                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3610     }
3611 }
3612
3613 #ifdef PERL_MAD
3614  /*
3615  * Perl_madlex
3616  * The intent of this yylex wrapper is to minimize the changes to the
3617  * tokener when we aren't interested in collecting madprops.  It remains
3618  * to be seen how successful this strategy will be...
3619  */
3620
3621 int
3622 Perl_madlex(pTHX)
3623 {
3624     int optype;
3625     char *s = PL_bufptr;
3626
3627     /* make sure PL_thiswhite is initialized */
3628     PL_thiswhite = 0;
3629     PL_thismad = 0;
3630
3631     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3632     if (PL_pending_ident)
3633         return S_pending_ident(aTHX);
3634
3635     /* previous token ate up our whitespace? */
3636     if (!PL_lasttoke && PL_nextwhite) {
3637         PL_thiswhite = PL_nextwhite;
3638         PL_nextwhite = 0;
3639     }
3640
3641     /* isolate the token, and figure out where it is without whitespace */
3642     PL_realtokenstart = -1;
3643     PL_thistoken = 0;
3644     optype = yylex();
3645     s = PL_bufptr;
3646     assert(PL_curforce < 0);
3647
3648     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3649         if (!PL_thistoken) {
3650             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3651                 PL_thistoken = newSVpvs("");
3652             else {
3653                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3654                 PL_thistoken = newSVpvn(tstart, s - tstart);
3655             }
3656         }
3657         if (PL_thismad) /* install head */
3658             CURMAD('X', PL_thistoken);
3659     }
3660
3661     /* last whitespace of a sublex? */
3662     if (optype == ')' && PL_endwhite) {
3663         CURMAD('X', PL_endwhite);
3664     }
3665
3666     if (!PL_thismad) {
3667
3668         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3669         if (!PL_thiswhite && !PL_endwhite && !optype) {
3670             sv_free(PL_thistoken);
3671             PL_thistoken = 0;
3672             return 0;
3673         }
3674
3675         /* put off final whitespace till peg */
3676         if (optype == ';' && !PL_rsfp) {
3677             PL_nextwhite = PL_thiswhite;
3678             PL_thiswhite = 0;
3679         }
3680         else if (PL_thisopen) {
3681             CURMAD('q', PL_thisopen);
3682             if (PL_thistoken)
3683                 sv_free(PL_thistoken);
3684             PL_thistoken = 0;
3685         }
3686         else {
3687             /* Store actual token text as madprop X */
3688             CURMAD('X', PL_thistoken);
3689         }
3690
3691         if (PL_thiswhite) {
3692             /* add preceding whitespace as madprop _ */
3693             CURMAD('_', PL_thiswhite);
3694         }
3695
3696         if (PL_thisstuff) {
3697             /* add quoted material as madprop = */
3698             CURMAD('=', PL_thisstuff);
3699         }
3700
3701         if (PL_thisclose) {
3702             /* add terminating quote as madprop Q */
3703             CURMAD('Q', PL_thisclose);
3704         }
3705     }
3706
3707     /* special processing based on optype */
3708
3709     switch (optype) {
3710
3711     /* opval doesn't need a TOKEN since it can already store mp */
3712     case WORD:
3713     case METHOD:
3714     case FUNCMETH:
3715     case THING:
3716     case PMFUNC:
3717     case PRIVATEREF:
3718     case FUNC0SUB:
3719     case UNIOPSUB:
3720     case LSTOPSUB:
3721         if (pl_yylval.opval)
3722             append_madprops(PL_thismad, pl_yylval.opval, 0);
3723         PL_thismad = 0;
3724         return optype;
3725
3726     /* fake EOF */
3727     case 0:
3728         optype = PEG;
3729         if (PL_endwhite) {
3730             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3731             PL_endwhite = 0;
3732         }
3733         break;
3734
3735     case ']':
3736     case '}':
3737         if (PL_faketokens)
3738             break;
3739         /* remember any fake bracket that lexer is about to discard */
3740         if (PL_lex_brackets == 1 &&
3741             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3742         {
3743             s = PL_bufptr;
3744             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3745                 s++;
3746             if (*s == '}') {
3747                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3748                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3749                 PL_thiswhite = 0;
3750                 PL_bufptr = s - 1;
3751                 break;  /* don't bother looking for trailing comment */
3752             }
3753             else
3754                 s = PL_bufptr;
3755         }
3756         if (optype == ']')
3757             break;
3758         /* FALLTHROUGH */
3759
3760     /* attach a trailing comment to its statement instead of next token */
3761     case ';':
3762         if (PL_faketokens)
3763             break;
3764         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3765             s = PL_bufptr;
3766             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3767                 s++;
3768             if (*s == '\n' || *s == '#') {
3769                 while (s < PL_bufend && *s != '\n')
3770                     s++;
3771                 if (s < PL_bufend)
3772                     s++;
3773                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3774                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3775                 PL_thiswhite = 0;
3776                 PL_bufptr = s;
3777             }
3778         }
3779         break;
3780
3781     /* pval */
3782     case LABEL:
3783         break;
3784
3785     /* ival */
3786     default:
3787         break;
3788
3789     }
3790
3791     /* Create new token struct.  Note: opvals return early above. */
3792     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3793     PL_thismad = 0;
3794     return optype;
3795 }
3796 #endif
3797
3798 STATIC char *
3799 S_tokenize_use(pTHX_ int is_use, char *s) {
3800     dVAR;
3801
3802     PERL_ARGS_ASSERT_TOKENIZE_USE;
3803
3804     if (PL_expect != XSTATE)
3805         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3806                     is_use ? "use" : "no"));
3807     s = SKIPSPACE1(s);
3808     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3809         s = force_version(s, TRUE);
3810         if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3811             start_force(PL_curforce);
3812             NEXTVAL_NEXTTOKE.opval = NULL;
3813             force_next(WORD);
3814         }
3815         else if (*s == 'v') {
3816             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3817             s = force_version(s, FALSE);
3818         }
3819     }
3820     else {
3821         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3822         s = force_version(s, FALSE);
3823     }
3824     pl_yylval.ival = is_use;
3825     return s;
3826 }
3827 #ifdef DEBUGGING
3828     static const char* const exp_name[] =
3829         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3830           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3831         };
3832 #endif
3833
3834 /*
3835   yylex
3836
3837   Works out what to call the token just pulled out of the input
3838   stream.  The yacc parser takes care of taking the ops we return and
3839   stitching them into a tree.
3840
3841   Returns:
3842     PRIVATEREF
3843
3844   Structure:
3845       if read an identifier
3846           if we're in a my declaration
3847               croak if they tried to say my($foo::bar)
3848               build the ops for a my() declaration
3849           if it's an access to a my() variable
3850               are we in a sort block?
3851                   croak if my($a); $a <=> $b
3852               build ops for access to a my() variable
3853           if in a dq string, and they've said @foo and we can't find @foo
3854               croak
3855           build ops for a bareword
3856       if we already built the token before, use it.
3857 */
3858
3859
3860 #ifdef __SC__
3861 #pragma segment Perl_yylex
3862 #endif
3863 int
3864 Perl_yylex(pTHX)
3865 {
3866     dVAR;
3867     register char *s = PL_bufptr;
3868     register char *d;
3869     STRLEN len;
3870     bool bof = FALSE;
3871     U32 fake_eof = 0;
3872
3873     /* orig_keyword, gvp, and gv are initialized here because
3874      * jump to the label just_a_word_zero can bypass their
3875      * initialization later. */
3876     I32 orig_keyword = 0;
3877     GV *gv = NULL;
3878     GV **gvp = NULL;
3879
3880     DEBUG_T( {
3881         SV* tmp = newSVpvs("");
3882         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3883             (IV)CopLINE(PL_curcop),
3884             lex_state_names[PL_lex_state],
3885             exp_name[PL_expect],
3886             pv_display(tmp, s, strlen(s), 0, 60));
3887         SvREFCNT_dec(tmp);
3888     } );
3889     /* check if there's an identifier for us to look at */
3890     if (PL_pending_ident)
3891         return REPORT(S_pending_ident(aTHX));
3892
3893     /* no identifier pending identification */
3894
3895     switch (PL_lex_state) {
3896 #ifdef COMMENTARY
3897     case LEX_NORMAL:            /* Some compilers will produce faster */
3898     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3899         break;
3900 #endif
3901
3902     /* when we've already built the next token, just pull it out of the queue */
3903     case LEX_KNOWNEXT:
3904 #ifdef PERL_MAD
3905         PL_lasttoke--;
3906         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3907         if (PL_madskills) {
3908             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3909             PL_nexttoke[PL_lasttoke].next_mad = 0;
3910             if (PL_thismad && PL_thismad->mad_key == '_') {
3911                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3912                 PL_thismad->mad_val = 0;
3913                 mad_free(PL_thismad);
3914                 PL_thismad = 0;
3915             }
3916         }
3917         if (!PL_lasttoke) {
3918             PL_lex_state = PL_lex_defer;
3919             PL_expect = PL_lex_expect;
3920             PL_lex_defer = LEX_NORMAL;
3921             if (!PL_nexttoke[PL_lasttoke].next_type)
3922                 return yylex();
3923         }
3924 #else
3925         PL_nexttoke--;
3926         pl_yylval = PL_nextval[PL_nexttoke];
3927         if (!PL_nexttoke) {
3928             PL_lex_state = PL_lex_defer;
3929             PL_expect = PL_lex_expect;
3930             PL_lex_defer = LEX_NORMAL;
3931         }
3932 #endif
3933 #ifdef PERL_MAD
3934         /* FIXME - can these be merged?  */
3935         return(PL_nexttoke[PL_lasttoke].next_type);
3936 #else
3937         return REPORT(PL_nexttype[PL_nexttoke]);
3938 #endif
3939
3940     /* interpolated case modifiers like \L \U, including \Q and \E.
3941        when we get here, PL_bufptr is at the \
3942     */
3943     case LEX_INTERPCASEMOD:
3944 #ifdef DEBUGGING
3945         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3946             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3947 #endif
3948         /* handle \E or end of string */
3949         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3950             /* if at a \E */
3951             if (PL_lex_casemods) {
3952                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3953                 PL_lex_casestack[PL_lex_casemods] = '\0';
3954
3955                 if (PL_bufptr != PL_bufend
3956                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3957                     PL_bufptr += 2;
3958                     PL_lex_state = LEX_INTERPCONCAT;
3959 #ifdef PERL_MAD
3960                     if (PL_madskills)
3961                         PL_thistoken = newSVpvs("\\E");
3962 #endif
3963                 }
3964                 return REPORT(')');
3965             }
3966 #ifdef PERL_MAD
3967             while (PL_bufptr != PL_bufend &&
3968               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3969                 if (!PL_thiswhite)
3970                     PL_thiswhite = newSVpvs("");
3971                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3972                 PL_bufptr += 2;
3973             }
3974 #else
3975             if (PL_bufptr != PL_bufend)
3976                 PL_bufptr += 2;
3977 #endif
3978             PL_lex_state = LEX_INTERPCONCAT;
3979             return yylex();
3980         }
3981         else {
3982             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3983               "### Saw case modifier\n"); });
3984             s = PL_bufptr + 1;
3985             if (s[1] == '\\' && s[2] == 'E') {
3986 #ifdef PERL_MAD
3987                 if (!PL_thiswhite)
3988                     PL_thiswhite = newSVpvs("");
3989                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3990 #endif
3991                 PL_bufptr = s + 3;
3992                 PL_lex_state = LEX_INTERPCONCAT;
3993                 return yylex();
3994             }
3995             else {
3996                 I32 tmp;
3997                 if (!PL_madskills) /* when just compiling don't need correct */
3998                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3999                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4000                 if ((*s == 'L' || *s == 'U') &&
4001                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4002                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4003                     return REPORT(')');
4004                 }
4005                 if (PL_lex_casemods > 10)
4006                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4007                 PL_lex_casestack[PL_lex_casemods++] = *s;
4008                 PL_lex_casestack[PL_lex_casemods] = '\0';
4009                 PL_lex_state = LEX_INTERPCONCAT;
4010                 start_force(PL_curforce);
4011                 NEXTVAL_NEXTTOKE.ival = 0;
4012                 force_next('(');
4013                 start_force(PL_curforce);
4014                 if (*s == 'l')
4015                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4016                 else if (*s == 'u')
4017                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4018                 else if (*s == 'L')
4019                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4020                 else if (*s == 'U')
4021                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4022                 else if (*s == 'Q')
4023                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4024                 else
4025                     Perl_croak(aTHX_ "panic: yylex");
4026                 if (PL_madskills) {
4027                     SV* const tmpsv = newSVpvs("\\ ");
4028                     /* replace the space with the character we want to escape
4029                      */
4030                     SvPVX(tmpsv)[1] = *s;
4031                     curmad('_', tmpsv);
4032                 }
4033                 PL_bufptr = s + 1;
4034             }
4035             force_next(FUNC);
4036             if (PL_lex_starts) {
4037                 s = PL_bufptr;
4038                 PL_lex_starts = 0;
4039 #ifdef PERL_MAD
4040                 if (PL_madskills) {
4041                     if (PL_thistoken)
4042                         sv_free(PL_thistoken);
4043                     PL_thistoken = newSVpvs("");
4044                 }
4045 #endif
4046                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4047                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4048                     OPERATOR(',');
4049                 else
4050                     Aop(OP_CONCAT);
4051             }
4052             else
4053                 return yylex();
4054         }
4055
4056     case LEX_INTERPPUSH:
4057         return REPORT(sublex_push());
4058
4059     case LEX_INTERPSTART:
4060         if (PL_bufptr == PL_bufend)
4061             return REPORT(sublex_done());
4062         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4063               "### Interpolated variable\n"); });
4064         PL_expect = XTERM;
4065         PL_lex_dojoin = (*PL_bufptr == '@');
4066         PL_lex_state = LEX_INTERPNORMAL;
4067         if (PL_lex_dojoin) {
4068             start_force(PL_curforce);
4069             NEXTVAL_NEXTTOKE.ival = 0;
4070             force_next(',');
4071             start_force(PL_curforce);
4072             force_ident("\"", '$');
4073             start_force(PL_curforce);
4074             NEXTVAL_NEXTTOKE.ival = 0;
4075             force_next('$');
4076             start_force(PL_curforce);
4077             NEXTVAL_NEXTTOKE.ival = 0;
4078             force_next('(');
4079             start_force(PL_curforce);
4080             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4081             force_next(FUNC);
4082         }
4083         if (PL_lex_starts++) {
4084             s = PL_bufptr;
4085 #ifdef PERL_MAD
4086             if (PL_madskills) {
4087                 if (PL_thistoken)
4088                     sv_free(PL_thistoken);
4089                 PL_thistoken = newSVpvs("");
4090             }
4091 #endif
4092             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4093             if (!PL_lex_casemods && PL_lex_inpat)
4094                 OPERATOR(',');
4095             else
4096                 Aop(OP_CONCAT);
4097         }
4098         return yylex();
4099
4100     case LEX_INTERPENDMAYBE:
4101         if (intuit_more(PL_bufptr)) {
4102             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4103             break;
4104         }
4105         /* FALL THROUGH */
4106
4107     case LEX_INTERPEND:
4108         if (PL_lex_dojoin) {
4109             PL_lex_dojoin = FALSE;
4110             PL_lex_state = LEX_INTERPCONCAT;
4111 #ifdef PERL_MAD
4112             if (PL_madskills) {
4113                 if (PL_thistoken)
4114                     sv_free(PL_thistoken);
4115                 PL_thistoken = newSVpvs("");
4116             }
4117 #endif
4118             return REPORT(')');
4119         }
4120         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4121             && SvEVALED(PL_lex_repl))
4122         {
4123             if (PL_bufptr != PL_bufend)
4124                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4125             PL_lex_repl = NULL;
4126         }
4127         /* FALLTHROUGH */
4128     case LEX_INTERPCONCAT:
4129 #ifdef DEBUGGING
4130         if (PL_lex_brackets)
4131             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4132 #endif
4133         if (PL_bufptr == PL_bufend)
4134             return REPORT(sublex_done());
4135
4136         if (SvIVX(PL_linestr) == '\'') {
4137             SV *sv = newSVsv(PL_linestr);
4138             if (!PL_lex_inpat)
4139                 sv = tokeq(sv);
4140             else if ( PL_hints & HINT_NEW_RE )
4141                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4142             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4143             s = PL_bufend;
4144         }
4145         else {
4146             s = scan_const(PL_bufptr);
4147             if (*s == '\\')
4148                 PL_lex_state = LEX_INTERPCASEMOD;
4149             else
4150                 PL_lex_state = LEX_INTERPSTART;
4151         }
4152
4153         if (s != PL_bufptr) {
4154             start_force(PL_curforce);
4155             if (PL_madskills) {
4156                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4157             }
4158             NEXTVAL_NEXTTOKE = pl_yylval;
4159             PL_expect = XTERM;
4160             force_next(THING);
4161             if (PL_lex_starts++) {
4162 #ifdef PERL_MAD
4163                 if (PL_madskills) {
4164                     if (PL_thistoken)
4165                         sv_free(PL_thistoken);
4166                     PL_thistoken = newSVpvs("");
4167                 }
4168 #endif
4169                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4170                 if (!PL_lex_casemods && PL_lex_inpat)
4171                     OPERATOR(',');
4172                 else
4173                     Aop(OP_CONCAT);
4174             }
4175             else {
4176                 PL_bufptr = s;
4177                 return yylex();
4178             }
4179         }
4180
4181         return yylex();
4182     case LEX_FORMLINE:
4183         PL_lex_state = LEX_NORMAL;
4184         s = scan_formline(PL_bufptr);
4185         if (!PL_lex_formbrack)
4186             goto rightbracket;
4187         OPERATOR(';');
4188     }
4189
4190     s = PL_bufptr;
4191     PL_oldoldbufptr = PL_oldbufptr;
4192     PL_oldbufptr = s;
4193
4194   retry:
4195 #ifdef PERL_MAD
4196     if (PL_thistoken) {
4197         sv_free(PL_thistoken);
4198         PL_thistoken = 0;
4199     }
4200     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4201 #endif
4202     switch (*s) {
4203     default:
4204         if (isIDFIRST_lazy_if(s,UTF))
4205             goto keylookup;
4206         {
4207         unsigned char c = *s;
4208         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4209         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4210             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4211         } else {
4212             d = PL_linestart;
4213         }
4214         *s = '\0';
4215         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4216     }
4217     case 4:
4218     case 26:
4219         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4220     case 0:
4221 #ifdef PERL_MAD
4222         if (PL_madskills)
4223             PL_faketokens = 0;
4224 #endif
4225         if (!PL_rsfp) {
4226             PL_last_uni = 0;
4227             PL_last_lop = 0;
4228             if (PL_lex_brackets) {
4229                 yyerror((const char *)
4230                         (PL_lex_formbrack
4231                          ? "Format not terminated"
4232                          : "Missing right curly or square bracket"));
4233             }
4234             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4235                         "### Tokener got EOF\n");
4236             } );
4237             TOKEN(0);
4238         }
4239         if (s++ < PL_bufend)
4240             goto retry;                 /* ignore stray nulls */
4241         PL_last_uni = 0;
4242         PL_last_lop = 0;
4243         if (!PL_in_eval && !PL_preambled) {
4244             PL_preambled = TRUE;
4245 #ifdef PERL_MAD
4246             if (PL_madskills)
4247                 PL_faketokens = 1;
4248 #endif
4249             if (PL_perldb) {
4250                 /* Generate a string of Perl code to load the debugger.
4251                  * If PERL5DB is set, it will return the contents of that,
4252                  * otherwise a compile-time require of perl5db.pl.  */
4253
4254                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4255
4256                 if (pdb) {
4257                     sv_setpv(PL_linestr, pdb);
4258                     sv_catpvs(PL_linestr,";");
4259                 } else {
4260                     SETERRNO(0,SS_NORMAL);
4261                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4262                 }
4263             } else
4264                 sv_setpvs(PL_linestr,"");
4265             if (PL_preambleav) {
4266                 SV **svp = AvARRAY(PL_preambleav);
4267                 SV **const end = svp + AvFILLp(PL_preambleav);
4268                 while(svp <= end) {
4269                     sv_catsv(PL_linestr, *svp);
4270                     ++svp;
4271                     sv_catpvs(PL_linestr, ";");
4272                 }
4273                 sv_free(MUTABLE_SV(PL_preambleav));
4274                 PL_preambleav = NULL;
4275             }
4276             if (PL_minus_E)
4277                 sv_catpvs(PL_linestr,
4278                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4279             if (PL_minus_n || PL_minus_p) {
4280                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4281                 if (PL_minus_l)
4282                     sv_catpvs(PL_linestr,"chomp;");
4283                 if (PL_minus_a) {
4284                     if (PL_minus_F) {
4285                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4286                              || *PL_splitstr == '"')
4287                               && strchr(PL_splitstr + 1, *PL_splitstr))
4288                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4289                         else {
4290                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4291                                bytes can be used as quoting characters.  :-) */
4292                             const char *splits = PL_splitstr;
4293                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4294                             do {
4295                                 /* Need to \ \s  */
4296                                 if (*splits == '\\')
4297                                     sv_catpvn(PL_linestr, splits, 1);
4298                                 sv_catpvn(PL_linestr, splits, 1);
4299                             } while (*splits++);
4300                             /* This loop will embed the trailing NUL of
4301                                PL_linestr as the last thing it does before
4302                                terminating.  */
4303                             sv_catpvs(PL_linestr, ");");
4304                         }
4305                     }
4306                     else
4307                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4308                 }
4309             }
4310             sv_catpvs(PL_linestr, "\n");
4311             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4312             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4313             PL_last_lop = PL_last_uni = NULL;
4314             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4315                 update_debugger_info(PL_linestr, NULL, 0);
4316             goto retry;
4317         }
4318         do {
4319             fake_eof = 0;
4320             bof = PL_rsfp ? TRUE : FALSE;
4321             if (0) {
4322               fake_eof:
4323                 fake_eof = LEX_FAKE_EOF;
4324             }
4325             PL_bufptr = PL_bufend;
4326             CopLINE_inc(PL_curcop);
4327             if (!lex_next_chunk(fake_eof)) {
4328                 CopLINE_dec(PL_curcop);
4329                 s = PL_bufptr;
4330                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4331             }
4332             CopLINE_dec(PL_curcop);
4333 #ifdef PERL_MAD
4334             if (!PL_rsfp)
4335                 PL_realtokenstart = -1;
4336 #endif
4337             s = PL_bufptr;
4338             /* If it looks like the start of a BOM or raw UTF-16,
4339              * check if it in fact is. */
4340             if (bof && PL_rsfp &&
4341                      (*s == 0 ||
4342                       *(U8*)s == 0xEF ||
4343                       *(U8*)s >= 0xFE ||
4344                       s[1] == 0)) {
4345                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4346                 if (bof) {
4347                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4348                     s = swallow_bom((U8*)s);
4349                 }
4350             }
4351             if (PL_doextract) {
4352                 /* Incest with pod. */
4353 #ifdef PERL_MAD
4354                 if (PL_madskills)
4355                     sv_catsv(PL_thiswhite, PL_linestr);
4356 #endif
4357                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4358                     sv_setpvs(PL_linestr, "");
4359                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4360                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4361                     PL_last_lop = PL_last_uni = NULL;
4362                     PL_doextract = FALSE;
4363                 }
4364             }
4365             incline(s);
4366         } while (PL_doextract);
4367         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4368         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4369         PL_last_lop = PL_last_uni = NULL;
4370         if (CopLINE(PL_curcop) == 1) {
4371             while (s < PL_bufend && isSPACE(*s))
4372                 s++;
4373             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4374                 s++;
4375 #ifdef PERL_MAD
4376             if (PL_madskills)
4377                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4378 #endif
4379             d = NULL;
4380             if (!PL_in_eval) {
4381                 if (*s == '#' && *(s+1) == '!')
4382                     d = s + 2;
4383 #ifdef ALTERNATE_SHEBANG
4384                 else {
4385                     static char const as[] = ALTERNATE_SHEBANG;
4386                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4387                         d = s + (sizeof(as) - 1);
4388                 }
4389 #endif /* ALTERNATE_SHEBANG */
4390             }
4391             if (d) {
4392                 char *ipath;
4393                 char *ipathend;
4394
4395                 while (isSPACE(*d))
4396                     d++;
4397                 ipath = d;
4398                 while (*d && !isSPACE(*d))
4399                     d++;
4400                 ipathend = d;
4401
4402 #ifdef ARG_ZERO_IS_SCRIPT
4403                 if (ipathend > ipath) {
4404                     /*
4405                      * HP-UX (at least) sets argv[0] to the script name,
4406                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4407                      * at least, set argv[0] to the basename of the Perl
4408                      * interpreter. So, having found "#!", we'll set it right.
4409                      */
4410                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4411                                                     SVt_PV)); /* $^X */
4412                     assert(SvPOK(x) || SvGMAGICAL(x));
4413                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4414                         sv_setpvn(x, ipath, ipathend - ipath);
4415                         SvSETMAGIC(x);
4416                     }
4417                     else {
4418                         STRLEN blen;
4419                         STRLEN llen;
4420                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4421                         const char * const lstart = SvPV_const(x,llen);
4422                         if (llen < blen) {
4423                             bstart += blen - llen;
4424                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4425                                 sv_setpvn(x, ipath, ipathend - ipath);
4426                                 SvSETMAGIC(x);
4427                             }
4428                         }
4429                     }
4430                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4431                 }
4432 #endif /* ARG_ZERO_IS_SCRIPT */
4433
4434                 /*
4435                  * Look for options.
4436                  */
4437                 d = instr(s,"perl -");
4438                 if (!d) {
4439                     d = instr(s,"perl");
4440 #if defined(DOSISH)
4441                     /* avoid getting into infinite loops when shebang
4442                      * line contains "Perl" rather than "perl" */
4443                     if (!d) {
4444                         for (d = ipathend-4; d >= ipath; --d) {
4445                             if ((*d == 'p' || *d == 'P')
4446                                 && !ibcmp(d, "perl", 4))
4447                             {
4448                                 break;
4449                             }
4450                         }
4451                         if (d < ipath)
4452                             d = NULL;
4453                     }
4454 #endif
4455                 }
4456 #ifdef ALTERNATE_SHEBANG
4457                 /*
4458                  * If the ALTERNATE_SHEBANG on this system starts with a
4459                  * character that can be part of a Perl expression, then if
4460                  * we see it but not "perl", we're probably looking at the
4461                  * start of Perl code, not a request to hand off to some
4462                  * other interpreter.  Similarly, if "perl" is there, but
4463                  * not in the first 'word' of the line, we assume the line
4464                  * contains the start of the Perl program.
4465                  */
4466                 if (d && *s != '#') {
4467                     const char *c = ipath;
4468                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4469                         c++;
4470                     if (c < d)
4471                         d = NULL;       /* "perl" not in first word; ignore */
4472                     else
4473                         *s = '#';       /* Don't try to parse shebang line */
4474                 }
4475 #endif /* ALTERNATE_SHEBANG */
4476                 if (!d &&
4477                     *s == '#' &&
4478                     ipathend > ipath &&
4479                     !PL_minus_c &&
4480                     !instr(s,"indir") &&
4481                     instr(PL_origargv[0],"perl"))
4482                 {
4483                     dVAR;
4484                     char **newargv;
4485
4486                     *ipathend = '\0';
4487                     s = ipathend + 1;
4488                     while (s < PL_bufend && isSPACE(*s))
4489                         s++;
4490                     if (s < PL_bufend) {
4491                         Newx(newargv,PL_origargc+3,char*);
4492                         newargv[1] = s;
4493                         while (s < PL_bufend && !isSPACE(*s))
4494                             s++;
4495                         *s = '\0';
4496                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4497                     }
4498                     else
4499                         newargv = PL_origargv;
4500                     newargv[0] = ipath;
4501                     PERL_FPU_PRE_EXEC
4502                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4503                     PERL_FPU_POST_EXEC
4504                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4505                 }
4506                 if (d) {
4507                     while (*d && !isSPACE(*d))
4508                         d++;
4509                     while (SPACE_OR_TAB(*d))
4510                         d++;
4511
4512                     if (*d++ == '-') {
4513                         const bool switches_done = PL_doswitches;
4514                         const U32 oldpdb = PL_perldb;
4515                         const bool oldn = PL_minus_n;
4516                         const bool oldp = PL_minus_p;
4517                         const char *d1 = d;
4518
4519                         do {
4520                             bool baduni = FALSE;
4521                             if (*d1 == 'C') {
4522                                 const char *d2 = d1 + 1;
4523                                 if (parse_unicode_opts((const char **)&d2)
4524                                     != PL_unicode)
4525                                     baduni = TRUE;
4526                             }
4527                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4528                                 const char * const m = d1;
4529                                 while (*d1 && !isSPACE(*d1))
4530                                     d1++;
4531                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4532                                       (int)(d1 - m), m);
4533                             }
4534                             d1 = moreswitches(d1);
4535                         } while (d1);
4536                         if (PL_doswitches && !switches_done) {
4537                             int argc = PL_origargc;
4538                             char **argv = PL_origargv;
4539                             do {
4540                                 argc--,argv++;
4541                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4542                             init_argv_symbols(argc,argv);
4543                         }
4544                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4545                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4546                               /* if we have already added "LINE: while (<>) {",
4547                                  we must not do it again */
4548                         {
4549                             sv_setpvs(PL_linestr, "");
4550                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4551                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4552                             PL_last_lop = PL_last_uni = NULL;
4553                             PL_preambled = FALSE;
4554                             if (PERLDB_LINE || PERLDB_SAVESRC)
4555                                 (void)gv_fetchfile(PL_origfilename);
4556                             goto retry;
4557                         }
4558                     }
4559                 }
4560             }
4561         }
4562         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4563             PL_bufptr = s;
4564             PL_lex_state = LEX_FORMLINE;
4565             return yylex();
4566         }
4567         goto retry;
4568     case '\r':
4569 #ifdef PERL_STRICT_CR
4570         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4571         Perl_croak(aTHX_
4572       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4573 #endif
4574     case ' ': case '\t': case '\f': case 013:
4575 #ifdef PERL_MAD
4576         PL_realtokenstart = -1;
4577         if (!PL_thiswhite)
4578             PL_thiswhite = newSVpvs("");
4579         sv_catpvn(PL_thiswhite, s, 1);
4580 #endif
4581         s++;
4582         goto retry;
4583     case '#':
4584     case '\n':
4585 #ifdef PERL_MAD
4586         PL_realtokenstart = -1;
4587         if (PL_madskills)
4588             PL_faketokens = 0;
4589 #endif
4590         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4591             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4592                 /* handle eval qq[#line 1 "foo"\n ...] */
4593                 CopLINE_dec(PL_curcop);
4594                 incline(s);
4595             }
4596             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4597                 s = SKIPSPACE0(s);
4598                 if (!PL_in_eval || PL_rsfp)
4599                     incline(s);
4600             }
4601             else {
4602                 d = s;
4603                 while (d < PL_bufend && *d != '\n')
4604                     d++;
4605                 if (d < PL_bufend)
4606                     d++;
4607                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4608                   Perl_croak(aTHX_ "panic: input overflow");
4609 #ifdef PERL_MAD
4610                 if (PL_madskills)
4611                     PL_thiswhite = newSVpvn(s, d - s);
4612 #endif
4613                 s = d;
4614                 incline(s);
4615             }
4616             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4617                 PL_bufptr = s;
4618                 PL_lex_state = LEX_FORMLINE;
4619                 return yylex();
4620             }
4621         }
4622         else {
4623 #ifdef PERL_MAD
4624             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4625                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4626                     PL_faketokens = 0;
4627                     s = SKIPSPACE0(s);
4628                     TOKEN(PEG); /* make sure any #! line is accessible */
4629                 }
4630                 s = SKIPSPACE0(s);
4631             }
4632             else {
4633 /*              if (PL_madskills && PL_lex_formbrack) { */
4634                     d = s;
4635                     while (d < PL_bufend && *d != '\n')
4636                         d++;
4637                     if (d < PL_bufend)
4638                         d++;
4639                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4640                       Perl_croak(aTHX_ "panic: input overflow");
4641                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4642                         if (!PL_thiswhite)
4643                             PL_thiswhite = newSVpvs("");
4644                         if (CopLINE(PL_curcop) == 1) {
4645                             sv_setpvs(PL_thiswhite, "");
4646                             PL_faketokens = 0;
4647                         }
4648                         sv_catpvn(PL_thiswhite, s, d - s);
4649                     }
4650                     s = d;
4651 /*              }
4652                 *s = '\0';
4653                 PL_bufend = s; */
4654             }
4655 #else
4656             *s = '\0';
4657             PL_bufend = s;
4658 #endif
4659         }
4660         goto retry;
4661     case '-':
4662         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4663             I32 ftst = 0;
4664             char tmp;
4665
4666             s++;
4667             PL_bufptr = s;
4668             tmp = *s++;
4669
4670             while (s < PL_bufend && SPACE_OR_TAB(*s))
4671                 s++;
4672
4673             if (strnEQ(s,"=>",2)) {
4674                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4675                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4676                 OPERATOR('-');          /* unary minus */
4677             }
4678             PL_last_uni = PL_oldbufptr;
4679             switch (tmp) {
4680             case 'r': ftst = OP_FTEREAD;        break;
4681             case 'w': ftst = OP_FTEWRITE;       break;
4682             case 'x': ftst = OP_FTEEXEC;        break;
4683             case 'o': ftst = OP_FTEOWNED;       break;
4684             case 'R': ftst = OP_FTRREAD;        break;
4685             case 'W': ftst = OP_FTRWRITE;       break;
4686             case 'X': ftst = OP_FTREXEC;        break;
4687             case 'O': ftst = OP_FTROWNED;       break;
4688             case 'e': ftst = OP_FTIS;           break;
4689             case 'z': ftst = OP_FTZERO;         break;
4690             case 's': ftst = OP_FTSIZE;         break;
4691             case 'f': ftst = OP_FTFILE;         break;
4692             case 'd': ftst = OP_FTDIR;          break;
4693             case 'l': ftst = OP_FTLINK;         break;
4694             case 'p': ftst = OP_FTPIPE;         break;
4695             case 'S': ftst = OP_FTSOCK;         break;
4696             case 'u': ftst = OP_FTSUID;         break;
4697             case 'g': ftst = OP_FTSGID;         break;
4698             case 'k': ftst = OP_FTSVTX;         break;
4699             case 'b': ftst = OP_FTBLK;          break;
4700             case 'c': ftst = OP_FTCHR;          break;
4701             case 't': ftst = OP_FTTTY;          break;
4702             case 'T': ftst = OP_FTTEXT;         break;
4703             case 'B': ftst = OP_FTBINARY;       break;
4704             case 'M': case 'A': case 'C':
4705                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4706                 switch (tmp) {
4707                 case 'M': ftst = OP_FTMTIME;    break;
4708                 case 'A': ftst = OP_FTATIME;    break;
4709                 case 'C': ftst = OP_FTCTIME;    break;
4710                 default:                        break;
4711                 }
4712                 break;
4713             default:
4714                 break;
4715             }
4716             if (ftst) {
4717                 PL_last_lop_op = (OPCODE)ftst;
4718                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4719                         "### Saw file test %c\n", (int)tmp);
4720                 } );
4721                 FTST(ftst);
4722             }
4723             else {
4724                 /* Assume it was a minus followed by a one-letter named
4725                  * subroutine call (or a -bareword), then. */
4726                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4727                         "### '-%c' looked like a file test but was not\n",
4728                         (int) tmp);
4729                 } );
4730                 s = --PL_bufptr;
4731             }
4732         }
4733         {
4734             const char tmp = *s++;
4735             if (*s == tmp) {
4736                 s++;
4737                 if (PL_expect == XOPERATOR)
4738                     TERM(POSTDEC);
4739                 else
4740                     OPERATOR(PREDEC);
4741             }
4742             else if (*s == '>') {
4743                 s++;
4744                 s = SKIPSPACE1(s);
4745                 if (isIDFIRST_lazy_if(s,UTF)) {
4746                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4747                     TOKEN(ARROW);
4748                 }
4749                 else if (*s == '$')
4750                     OPERATOR(ARROW);
4751                 else
4752                     TERM(ARROW);
4753             }
4754             if (PL_expect == XOPERATOR)
4755                 Aop(OP_SUBTRACT);
4756             else {
4757                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4758                     check_uni();
4759                 OPERATOR('-');          /* unary minus */
4760             }
4761         }
4762
4763     case '+':
4764         {
4765             const char tmp = *s++;
4766             if (*s == tmp) {
4767                 s++;
4768                 if (PL_expect == XOPERATOR)
4769                     TERM(POSTINC);
4770                 else
4771                     OPERATOR(PREINC);
4772             }
4773             if (PL_expect == XOPERATOR)
4774                 Aop(OP_ADD);
4775             else {
4776                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4777                     check_uni();
4778                 OPERATOR('+');
4779             }
4780         }
4781
4782     case '*':
4783         if (PL_expect != XOPERATOR) {
4784             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4785             PL_expect = XOPERATOR;
4786             force_ident(PL_tokenbuf, '*');
4787             if (!*PL_tokenbuf)
4788                 PREREF('*');
4789             TERM('*');
4790         }
4791         s++;
4792         if (*s == '*') {
4793             s++;
4794             PWop(OP_POW);
4795         }
4796         Mop(OP_MULTIPLY);
4797
4798     case '%':
4799         if (PL_expect == XOPERATOR) {
4800             ++s;
4801             Mop(OP_MODULO);
4802         }
4803         PL_tokenbuf[0] = '%';
4804         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4805                 sizeof PL_tokenbuf - 1, FALSE);
4806         if (!PL_tokenbuf[1]) {
4807             PREREF('%');
4808         }
4809         PL_pending_ident = '%';
4810         TERM('%');
4811
4812     case '^':
4813         s++;
4814         BOop(OP_BIT_XOR);
4815     case '[':
4816         PL_lex_brackets++;
4817         {
4818             const char tmp = *s++;
4819             OPERATOR(tmp);
4820         }
4821     case '~':
4822         if (s[1] == '~'
4823             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4824         {
4825             s += 2;
4826             Eop(OP_SMARTMATCH);
4827         }
4828     case ',':
4829         {
4830             const char tmp = *s++;
4831             OPERATOR(tmp);
4832         }
4833     case ':':
4834         if (s[1] == ':') {
4835             len = 0;
4836             goto just_a_word_zero_gv;
4837         }
4838         s++;
4839         switch (PL_expect) {
4840             OP *attrs;
4841 #ifdef PERL_MAD
4842             I32 stuffstart;
4843 #endif
4844         case XOPERATOR:
4845             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4846                 break;
4847             PL_bufptr = s;      /* update in case we back off */
4848             if (*s == '=') {
4849                 deprecate(":= for an empty attribute list");
4850             }
4851             goto grabattrs;
4852         case XATTRBLOCK:
4853             PL_expect = XBLOCK;
4854             goto grabattrs;
4855         case XATTRTERM:
4856             PL_expect = XTERMBLOCK;
4857          grabattrs:
4858 #ifdef PERL_MAD
4859             stuffstart = s - SvPVX(PL_linestr) - 1;
4860 #endif
4861             s = PEEKSPACE(s);
4862             attrs = NULL;
4863             while (isIDFIRST_lazy_if(s,UTF)) {
4864                 I32 tmp;
4865                 SV *sv;
4866                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4867                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4868                     if (tmp < 0) tmp = -tmp;
4869                     switch (tmp) {
4870                     case KEY_or:
4871                     case KEY_and:
4872                     case KEY_for:
4873                     case KEY_foreach:
4874                     case KEY_unless:
4875                     case KEY_if:
4876                     case KEY_while:
4877                     case KEY_until:
4878                         goto got_attrs;
4879                     default:
4880                         break;
4881                     }
4882                 }
4883                 sv = newSVpvn(s, len);
4884                 if (*d == '(') {
4885                     d = scan_str(d,TRUE,TRUE);
4886                     if (!d) {
4887                         /* MUST advance bufptr here to avoid bogus
4888                            "at end of line" context messages from yyerror().
4889                          */
4890                         PL_bufptr = s + len;
4891                         yyerror("Unterminated attribute parameter in attribute list");
4892                         if (attrs)
4893                             op_free(attrs);
4894                         sv_free(sv);
4895                         return REPORT(0);       /* EOF indicator */
4896                     }
4897                 }
4898                 if (PL_lex_stuff) {
4899                     sv_catsv(sv, PL_lex_stuff);
4900                     attrs = append_elem(OP_LIST, attrs,
4901                                         newSVOP(OP_CONST, 0, sv));
4902                     SvREFCNT_dec(PL_lex_stuff);
4903                     PL_lex_stuff = NULL;
4904                 }
4905                 else {
4906                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4907                         sv_free(sv);
4908                         if (PL_in_my == KEY_our) {
4909                             deprecate(":unique");
4910                         }
4911                         else
4912                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4913                     }
4914
4915                     /* NOTE: any CV attrs applied here need to be part of
4916                        the CVf_BUILTIN_ATTRS define in cv.h! */
4917                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4918                         sv_free(sv);
4919                         CvLVALUE_on(PL_compcv);
4920                     }
4921                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4922                         sv_free(sv);
4923                         deprecate(":locked");
4924                     }
4925                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4926                         sv_free(sv);
4927                         CvMETHOD_on(PL_compcv);
4928                     }
4929                     /* After we've set the flags, it could be argued that
4930                        we don't need to do the attributes.pm-based setting
4931                        process, and shouldn't bother appending recognized
4932                        flags.  To experiment with that, uncomment the
4933                        following "else".  (Note that's already been
4934                        uncommented.  That keeps the above-applied built-in
4935                        attributes from being intercepted (and possibly
4936                        rejected) by a package's attribute routines, but is
4937                        justified by the performance win for the common case
4938                        of applying only built-in attributes.) */
4939                     else
4940                         attrs = append_elem(OP_LIST, attrs,
4941                                             newSVOP(OP_CONST, 0,
4942                                                     sv));
4943                 }
4944                 s = PEEKSPACE(d);
4945                 if (*s == ':' && s[1] != ':')
4946                     s = PEEKSPACE(s+1);
4947                 else if (s == d)
4948                     break;      /* require real whitespace or :'s */
4949                 /* XXX losing whitespace on sequential attributes here */
4950             }
4951             {
4952                 const char tmp
4953                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4954                 if (*s != ';' && *s != '}' && *s != tmp
4955                     && (tmp != '=' || *s != ')')) {
4956                     const char q = ((*s == '\'') ? '"' : '\'');
4957                     /* If here for an expression, and parsed no attrs, back
4958                        off. */
4959                     if (tmp == '=' && !attrs) {
4960                         s = PL_bufptr;
4961                         break;
4962                     }
4963                     /* MUST advance bufptr here to avoid bogus "at end of line"
4964                        context messages from yyerror().
4965                     */
4966                     PL_bufptr = s;
4967                     yyerror( (const char *)
4968                              (*s
4969                               ? Perl_form(aTHX_ "Invalid separator character "
4970                                           "%c%c%c in attribute list", q, *s, q)
4971                               : "Unterminated attribute list" ) );
4972                     if (attrs)
4973                         op_free(attrs);
4974                     OPERATOR(':');
4975                 }
4976             }
4977         got_attrs:
4978             if (attrs) {
4979                 start_force(PL_curforce);
4980                 NEXTVAL_NEXTTOKE.opval = attrs;
4981                 CURMAD('_', PL_nextwhite);
4982                 force_next(THING);
4983             }
4984 #ifdef PERL_MAD
4985             if (PL_madskills) {
4986                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4987                                      (s - SvPVX(PL_linestr)) - stuffstart);
4988             }
4989 #endif
4990             TOKEN(COLONATTR);
4991         }
4992         OPERATOR(':');
4993     case '(':
4994         s++;
4995         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4996             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
4997         else
4998             PL_expect = XTERM;
4999         s = SKIPSPACE1(s);
5000         TOKEN('(');
5001     case ';':
5002         CLINE;
5003         {
5004             const char tmp = *s++;
5005             OPERATOR(tmp);
5006         }
5007     case ')':
5008         {
5009             const char tmp = *s++;
5010             s = SKIPSPACE1(s);
5011             if (*s == '{')
5012                 PREBLOCK(tmp);
5013             TERM(tmp);
5014         }
5015     case ']':
5016         s++;
5017         if (PL_lex_brackets <= 0)
5018             yyerror("Unmatched right square bracket");
5019         else
5020             --PL_lex_brackets;
5021         if (PL_lex_state == LEX_INTERPNORMAL) {
5022             if (PL_lex_brackets == 0) {
5023                 if (*s == '-' && s[1] == '>')
5024                     PL_lex_state = LEX_INTERPENDMAYBE;
5025                 else if (*s != '[' && *s != '{')
5026                     PL_lex_state = LEX_INTERPEND;
5027             }
5028         }
5029         TERM(']');
5030     case '{':
5031       leftbracket:
5032         s++;
5033         if (PL_lex_brackets > 100) {
5034             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5035         }
5036         switch (PL_expect) {
5037         case XTERM:
5038             if (PL_lex_formbrack) {
5039                 s--;
5040                 PRETERMBLOCK(DO);
5041             }
5042             if (PL_oldoldbufptr == PL_last_lop)
5043                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5044             else
5045                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5046             OPERATOR(HASHBRACK);
5047         case XOPERATOR:
5048             while (s < PL_bufend && SPACE_OR_TAB(*s))
5049                 s++;
5050             d = s;
5051             PL_tokenbuf[0] = '\0';
5052             if (d < PL_bufend && *d == '-') {
5053                 PL_tokenbuf[0] = '-';
5054                 d++;
5055                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5056                     d++;
5057             }
5058             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5059                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5060                               FALSE, &len);
5061                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5062                     d++;
5063                 if (*d == '}') {
5064                     const char minus = (PL_tokenbuf[0] == '-');
5065                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5066                     if (minus)
5067                         force_next('-');
5068                 }
5069             }
5070             /* FALL THROUGH */
5071         case XATTRBLOCK:
5072         case XBLOCK:
5073             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5074             PL_expect = XSTATE;
5075             break;
5076         case XATTRTERM:
5077         case XTERMBLOCK:
5078             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5079             PL_expect = XSTATE;
5080             break;
5081         default: {
5082                 const char *t;
5083                 if (PL_oldoldbufptr == PL_last_lop)
5084                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5085                 else
5086                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5087                 s = SKIPSPACE1(s);
5088                 if (*s == '}') {
5089                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5090                         PL_expect = XTERM;
5091                         /* This hack is to get the ${} in the message. */
5092                         PL_bufptr = s+1;
5093                         yyerror("syntax error");
5094                         break;
5095                     }
5096                     OPERATOR(HASHBRACK);
5097                 }
5098                 /* This hack serves to disambiguate a pair of curlies
5099                  * as being a block or an anon hash.  Normally, expectation
5100                  * determines that, but in cases where we're not in a
5101                  * position to expect anything in particular (like inside
5102                  * eval"") we have to resolve the ambiguity.  This code
5103                  * covers the case where the first term in the curlies is a
5104                  * quoted string.  Most other cases need to be explicitly
5105                  * disambiguated by prepending a "+" before the opening
5106                  * curly in order to force resolution as an anon hash.
5107                  *
5108                  * XXX should probably propagate the outer expectation
5109                  * into eval"" to rely less on this hack, but that could
5110                  * potentially break current behavior of eval"".
5111                  * GSAR 97-07-21
5112                  */
5113                 t = s;
5114                 if (*s == '\'' || *s == '"' || *s == '`') {
5115                     /* common case: get past first string, handling escapes */
5116                     for (t++; t < PL_bufend && *t != *s;)
5117                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5118                             t++;
5119                     t++;
5120                 }
5121                 else if (*s == 'q') {
5122                     if (++t < PL_bufend
5123                         && (!isALNUM(*t)
5124                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5125                                 && !isALNUM(*t))))
5126                     {
5127                         /* skip q//-like construct */
5128                         const char *tmps;
5129                         char open, close, term;
5130                         I32 brackets = 1;
5131
5132                         while (t < PL_bufend && isSPACE(*t))
5133                             t++;
5134                         /* check for q => */
5135                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5136                             OPERATOR(HASHBRACK);
5137                         }
5138                         term = *t;
5139                         open = term;
5140                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5141                             term = tmps[5];
5142                         close = term;
5143                         if (open == close)
5144                             for (t++; t < PL_bufend; t++) {
5145                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5146                                     t++;
5147                                 else if (*t == open)
5148                                     break;
5149                             }
5150                         else {
5151                             for (t++; t < PL_bufend; t++) {
5152                                 if (*t == '\\' && t+1 < PL_bufend)
5153                                     t++;
5154                                 else if (*t == close && --brackets <= 0)
5155                                     break;
5156                                 else if (*t == open)
5157                                     brackets++;
5158                             }
5159                         }
5160                         t++;
5161                     }
5162                     else
5163                         /* skip plain q word */
5164                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5165                              t += UTF8SKIP(t);
5166                 }
5167                 else if (isALNUM_lazy_if(t,UTF)) {
5168                     t += UTF8SKIP(t);
5169                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5170                          t += UTF8SKIP(t);
5171                 }
5172                 while (t < PL_bufend && isSPACE(*t))
5173                     t++;
5174                 /* if comma follows first term, call it an anon hash */
5175                 /* XXX it could be a comma expression with loop modifiers */
5176                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5177                                    || (*t == '=' && t[1] == '>')))
5178                     OPERATOR(HASHBRACK);
5179                 if (PL_expect == XREF)
5180                     PL_expect = XTERM;
5181                 else {
5182                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5183                     PL_expect = XSTATE;
5184                 }
5185             }
5186             break;
5187         }
5188         pl_yylval.ival = CopLINE(PL_curcop);
5189         if (isSPACE(*s) || *s == '#')
5190             PL_copline = NOLINE;   /* invalidate current command line number */
5191         TOKEN('{');
5192     case '}':
5193       rightbracket:
5194         s++;
5195         if (PL_lex_brackets <= 0)
5196             yyerror("Unmatched right curly bracket");
5197         else
5198             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5199         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5200             PL_lex_formbrack = 0;
5201         if (PL_lex_state == LEX_INTERPNORMAL) {
5202             if (PL_lex_brackets == 0) {
5203                 if (PL_expect & XFAKEBRACK) {
5204                     PL_expect &= XENUMMASK;
5205                     PL_lex_state = LEX_INTERPEND;
5206                     PL_bufptr = s;
5207 #if 0
5208                     if (PL_madskills) {
5209                         if (!PL_thiswhite)
5210                             PL_thiswhite = newSVpvs("");
5211                         sv_catpvs(PL_thiswhite,"}");
5212                     }
5213 #endif
5214                     return yylex();     /* ignore fake brackets */
5215                 }
5216                 if (*s == '-' && s[1] == '>')
5217                     PL_lex_state = LEX_INTERPENDMAYBE;
5218                 else if (*s != '[' && *s != '{')
5219                     PL_lex_state = LEX_INTERPEND;
5220             }
5221         }
5222         if (PL_expect & XFAKEBRACK) {
5223             PL_expect &= XENUMMASK;
5224             PL_bufptr = s;
5225             return yylex();             /* ignore fake brackets */
5226         }
5227         start_force(PL_curforce);
5228         if (PL_madskills) {
5229             curmad('X', newSVpvn(s-1,1));
5230             CURMAD('_', PL_thiswhite);
5231         }
5232         force_next('}');
5233 #ifdef PERL_MAD
5234         if (!PL_thistoken)
5235             PL_thistoken = newSVpvs("");
5236 #endif
5237         TOKEN(';');
5238     case '&':
5239         s++;
5240         if (*s++ == '&')
5241             AOPERATOR(ANDAND);
5242         s--;
5243         if (PL_expect == XOPERATOR) {
5244             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5245                 && isIDFIRST_lazy_if(s,UTF))
5246             {
5247                 CopLINE_dec(PL_curcop);
5248                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5249                 CopLINE_inc(PL_curcop);
5250             }
5251             BAop(OP_BIT_AND);
5252         }
5253
5254         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5255         if (*PL_tokenbuf) {
5256             PL_expect = XOPERATOR;
5257             force_ident(PL_tokenbuf, '&');
5258         }
5259         else
5260             PREREF('&');
5261         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5262         TERM('&');
5263
5264     case '|':
5265         s++;
5266         if (*s++ == '|')
5267             AOPERATOR(OROR);
5268         s--;
5269         BOop(OP_BIT_OR);
5270     case '=':
5271         s++;
5272         {
5273             const char tmp = *s++;
5274             if (tmp == '=')
5275                 Eop(OP_EQ);
5276             if (tmp == '>')
5277                 OPERATOR(',');
5278             if (tmp == '~')
5279                 PMop(OP_MATCH);
5280             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5281                 && strchr("+-*/%.^&|<",tmp))
5282                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5283                             "Reversed %c= operator",(int)tmp);
5284             s--;
5285             if (PL_expect == XSTATE && isALPHA(tmp) &&
5286                 (s == PL_linestart+1 || s[-2] == '\n') )
5287                 {
5288                     if (PL_in_eval && !PL_rsfp) {
5289                         d = PL_bufend;
5290                         while (s < d) {
5291                             if (*s++ == '\n') {
5292                                 incline(s);
5293                                 if (strnEQ(s,"=cut",4)) {
5294                                     s = strchr(s,'\n');
5295                                     if (s)
5296                                         s++;
5297                                     else
5298                                         s = d;
5299                                     incline(s);
5300                                     goto retry;
5301                                 }
5302                             }
5303                         }
5304                         goto retry;
5305                     }
5306 #ifdef PERL_MAD
5307                     if (PL_madskills) {
5308                         if (!PL_thiswhite)
5309                             PL_thiswhite = newSVpvs("");
5310                         sv_catpvn(PL_thiswhite, PL_linestart,
5311                                   PL_bufend - PL_linestart);
5312                     }
5313 #endif
5314                     s = PL_bufend;
5315                     PL_doextract = TRUE;
5316                     goto retry;
5317                 }
5318         }
5319         if (PL_lex_brackets < PL_lex_formbrack) {
5320             const char *t = s;
5321 #ifdef PERL_STRICT_CR
5322             while (SPACE_OR_TAB(*t))
5323 #else
5324             while (SPACE_OR_TAB(*t) || *t == '\r')
5325 #endif
5326                 t++;
5327             if (*t == '\n' || *t == '#') {
5328                 s--;
5329                 PL_expect = XBLOCK;
5330                 goto leftbracket;
5331             }
5332         }
5333         pl_yylval.ival = 0;
5334         OPERATOR(ASSIGNOP);
5335     case '!':
5336         s++;
5337         {
5338             const char tmp = *s++;
5339             if (tmp == '=') {
5340                 /* was this !=~ where !~ was meant?
5341                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5342
5343                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5344                     const char *t = s+1;
5345
5346                     while (t < PL_bufend && isSPACE(*t))
5347                         ++t;
5348
5349                     if (*t == '/' || *t == '?' ||
5350                         ((*t == 'm' || *t == 's' || *t == 'y')
5351                          && !isALNUM(t[1])) ||
5352                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5353                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5354                                     "!=~ should be !~");
5355                 }
5356                 Eop(OP_NE);
5357             }
5358             if (tmp == '~')
5359                 PMop(OP_NOT);
5360         }
5361         s--;
5362         OPERATOR('!');
5363     case '<':
5364         if (PL_expect != XOPERATOR) {
5365             if (s[1] != '<' && !strchr(s,'>'))
5366                 check_uni();
5367             if (s[1] == '<')
5368                 s = scan_heredoc(s);
5369             else
5370                 s = scan_inputsymbol(s);
5371             TERM(sublex_start());
5372         }
5373         s++;
5374         {
5375             char tmp = *s++;
5376             if (tmp == '<')
5377                 SHop(OP_LEFT_SHIFT);
5378             if (tmp == '=') {
5379                 tmp = *s++;
5380                 if (tmp == '>')
5381                     Eop(OP_NCMP);
5382                 s--;
5383                 Rop(OP_LE);
5384             }
5385         }
5386         s--;
5387         Rop(OP_LT);
5388     case '>':
5389         s++;
5390         {
5391             const char tmp = *s++;
5392             if (tmp == '>')
5393                 SHop(OP_RIGHT_SHIFT);
5394             else if (tmp == '=')
5395                 Rop(OP_GE);
5396         }
5397         s--;
5398         Rop(OP_GT);
5399
5400     case '$':
5401         CLINE;
5402
5403         if (PL_expect == XOPERATOR) {
5404             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5405                 return deprecate_commaless_var_list();
5406             }
5407         }
5408
5409         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5410             PL_tokenbuf[0] = '@';
5411             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5412                            sizeof PL_tokenbuf - 1, FALSE);
5413             if (PL_expect == XOPERATOR)
5414                 no_op("Array length", s);
5415             if (!PL_tokenbuf[1])
5416                 PREREF(DOLSHARP);
5417             PL_expect = XOPERATOR;
5418             PL_pending_ident = '#';
5419             TOKEN(DOLSHARP);
5420         }
5421
5422         PL_tokenbuf[0] = '$';
5423         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5424                        sizeof PL_tokenbuf - 1, FALSE);
5425         if (PL_expect == XOPERATOR)
5426             no_op("Scalar", s);
5427         if (!PL_tokenbuf[1]) {
5428             if (s == PL_bufend)
5429                 yyerror("Final $ should be \\$ or $name");
5430             PREREF('$');
5431         }
5432
5433         /* This kludge not intended to be bulletproof. */
5434         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5435             pl_yylval.opval = newSVOP(OP_CONST, 0,
5436                                    newSViv(CopARYBASE_get(&PL_compiling)));
5437             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5438             TERM(THING);
5439         }
5440
5441         d = s;
5442         {
5443             const char tmp = *s;
5444             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5445                 s = SKIPSPACE1(s);
5446
5447             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5448                 && intuit_more(s)) {
5449                 if (*s == '[') {
5450                     PL_tokenbuf[0] = '@';
5451                     if (ckWARN(WARN_SYNTAX)) {
5452                         char *t = s+1;
5453
5454                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5455                             t++;
5456                         if (*t++ == ',') {
5457                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5458                             while (t < PL_bufend && *t != ']')
5459                                 t++;
5460                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5461                                         "Multidimensional syntax %.*s not supported",
5462                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5463                         }
5464                     }
5465                 }
5466                 else if (*s == '{') {
5467                     char *t;
5468                     PL_tokenbuf[0] = '%';
5469                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5470                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5471                         {
5472                             char tmpbuf[sizeof PL_tokenbuf];
5473                             do {
5474                                 t++;
5475                             } while (isSPACE(*t));
5476                             if (isIDFIRST_lazy_if(t,UTF)) {
5477                                 STRLEN len;
5478                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5479                                               &len);
5480                                 while (isSPACE(*t))
5481                                     t++;
5482                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5483                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5484                                                 "You need to quote \"%s\"",
5485                                                 tmpbuf);
5486                             }
5487                         }
5488                 }
5489             }
5490
5491             PL_expect = XOPERATOR;
5492             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5493                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5494                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5495                     PL_expect = XOPERATOR;
5496                 else if (strchr("$@\"'`q", *s))
5497                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5498                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5499                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5500                 else if (isIDFIRST_lazy_if(s,UTF)) {
5501                     char tmpbuf[sizeof PL_tokenbuf];
5502                     int t2;
5503                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5504                     if ((t2 = keyword(tmpbuf, len, 0))) {
5505                         /* binary operators exclude handle interpretations */
5506                         switch (t2) {
5507                         case -KEY_x:
5508                         case -KEY_eq:
5509                         case -KEY_ne:
5510                         case -KEY_gt:
5511                         case -KEY_lt:
5512                         case -KEY_ge:
5513                         case -KEY_le:
5514                         case -KEY_cmp:
5515                             break;
5516                         default:
5517                             PL_expect = XTERM;  /* e.g. print $fh length() */
5518                             break;
5519                         }
5520                     }
5521                     else {
5522                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5523                     }
5524                 }
5525                 else if (isDIGIT(*s))
5526                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5527                 else if (*s == '.' && isDIGIT(s[1]))
5528                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5529                 else if ((*s == '?' || *s == '-' || *s == '+')
5530                          && !isSPACE(s[1]) && s[1] != '=')
5531                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5532                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5533                          && s[1] != '/')
5534                     PL_expect = XTERM;          /* e.g. print $fh /.../
5535                                                    XXX except DORDOR operator
5536                                                 */
5537                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5538                          && s[2] != '=')
5539                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5540             }
5541         }
5542         PL_pending_ident = '$';
5543         TOKEN('$');
5544
5545     case '@':
5546         if (PL_expect == XOPERATOR)
5547             no_op("Array", s);
5548         PL_tokenbuf[0] = '@';
5549         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5550         if (!PL_tokenbuf[1]) {
5551             PREREF('@');
5552         }
5553         if (PL_lex_state == LEX_NORMAL)
5554             s = SKIPSPACE1(s);
5555         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5556             if (*s == '{')
5557                 PL_tokenbuf[0] = '%';
5558
5559             /* Warn about @ where they meant $. */
5560             if (*s == '[' || *s == '{') {
5561                 if (ckWARN(WARN_SYNTAX)) {
5562                     const char *t = s + 1;
5563                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5564                         t++;
5565                     if (*t == '}' || *t == ']') {
5566                         t++;
5567                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5568                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5569                             "Scalar value %.*s better written as $%.*s",
5570                             (int)(t-PL_bufptr), PL_bufptr,
5571                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5572                     }
5573                 }
5574             }
5575         }
5576         PL_pending_ident = '@';
5577         TERM('@');
5578
5579      case '/':                  /* may be division, defined-or, or pattern */
5580         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5581             s += 2;
5582             AOPERATOR(DORDOR);
5583         }
5584      case '?':                  /* may either be conditional or pattern */
5585         if (PL_expect == XOPERATOR) {
5586              char tmp = *s++;
5587              if(tmp == '?') {
5588                 OPERATOR('?');
5589              }
5590              else {
5591                  tmp = *s++;
5592                  if(tmp == '/') {
5593                      /* A // operator. */
5594                     AOPERATOR(DORDOR);
5595                  }
5596                  else {
5597                      s--;
5598                      Mop(OP_DIVIDE);
5599                  }
5600              }
5601          }
5602          else {
5603              /* Disable warning on "study /blah/" */
5604              if (PL_oldoldbufptr == PL_last_uni
5605               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5606                   || memNE(PL_last_uni, "study", 5)
5607                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5608               ))
5609                  check_uni();
5610              s = scan_pat(s,OP_MATCH);
5611              TERM(sublex_start());
5612          }
5613
5614     case '.':
5615         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5616 #ifdef PERL_STRICT_CR
5617             && s[1] == '\n'
5618 #else
5619             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5620 #endif
5621             && (s == PL_linestart || s[-1] == '\n') )
5622         {
5623             PL_lex_formbrack = 0;
5624             PL_expect = XSTATE;
5625             goto rightbracket;
5626         }
5627         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5628             s += 3;
5629             OPERATOR(YADAYADA);
5630         }
5631         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5632             char tmp = *s++;
5633             if (*s == tmp) {
5634                 s++;
5635                 if (*s == tmp) {
5636                     s++;
5637                     pl_yylval.ival = OPf_SPECIAL;
5638                 }
5639                 else
5640                     pl_yylval.ival = 0;
5641                 OPERATOR(DOTDOT);
5642             }
5643             if (PL_expect != XOPERATOR)
5644                 check_uni();
5645             Aop(OP_CONCAT);
5646         }
5647         /* FALL THROUGH */
5648     case '0': case '1': case '2': case '3': case '4':
5649     case '5': case '6': case '7': case '8': case '9':
5650         s = scan_num(s, &pl_yylval);
5651         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5652         if (PL_expect == XOPERATOR)
5653             no_op("Number",s);
5654         TERM(THING);
5655
5656     case '\'':
5657         s = scan_str(s,!!PL_madskills,FALSE);
5658         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5659         if (PL_expect == XOPERATOR) {
5660             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5661                 return deprecate_commaless_var_list();
5662             }
5663             else
5664                 no_op("String",s);
5665         }
5666         if (!s)
5667             missingterm(NULL);
5668         pl_yylval.ival = OP_CONST;
5669         TERM(sublex_start());
5670
5671     case '"':
5672         s = scan_str(s,!!PL_madskills,FALSE);
5673         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5674         if (PL_expect == XOPERATOR) {
5675             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5676                 return deprecate_commaless_var_list();
5677             }
5678             else
5679                 no_op("String",s);
5680         }
5681         if (!s)
5682             missingterm(NULL);
5683         pl_yylval.ival = OP_CONST;
5684         /* FIXME. I think that this can be const if char *d is replaced by
5685            more localised variables.  */
5686         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5687             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5688                 pl_yylval.ival = OP_STRINGIFY;
5689                 break;
5690             }
5691         }
5692         TERM(sublex_start());
5693
5694     case '`':
5695         s = scan_str(s,!!PL_madskills,FALSE);
5696         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5697         if (PL_expect == XOPERATOR)
5698             no_op("Backticks",s);
5699         if (!s)
5700             missingterm(NULL);
5701         readpipe_override();
5702         TERM(sublex_start());
5703
5704     case '\\':
5705         s++;
5706         if (PL_lex_inwhat && isDIGIT(*s))
5707             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5708                            *s, *s);
5709         if (PL_expect == XOPERATOR)
5710             no_op("Backslash",s);
5711         OPERATOR(REFGEN);
5712
5713     case 'v':
5714         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5715             char *start = s + 2;
5716             while (isDIGIT(*start) || *start == '_')
5717                 start++;
5718             if (*start == '.' && isDIGIT(start[1])) {
5719                 s = scan_num(s, &pl_yylval);
5720                 TERM(THING);
5721             }
5722             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5723             else if (!isALPHA(*start) && (PL_expect == XTERM
5724                         || PL_expect == XREF || PL_expect == XSTATE
5725                         || PL_expect == XTERMORDORDOR)) {
5726                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5727                 if (!gv) {
5728                     s = scan_num(s, &pl_yylval);
5729                     TERM(THING);
5730                 }
5731             }
5732         }
5733         goto keylookup;
5734     case 'x':
5735         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5736             s++;
5737             Mop(OP_REPEAT);
5738         }
5739         goto keylookup;
5740
5741     case '_':
5742     case 'a': case 'A':
5743     case 'b': case 'B':
5744     case 'c': case 'C':
5745     case 'd': case 'D':
5746     case 'e': case 'E':
5747     case 'f': case 'F':
5748     case 'g': case 'G':
5749     case 'h': case 'H':
5750     case 'i': case 'I':
5751     case 'j': case 'J':
5752     case 'k': case 'K':
5753     case 'l': case 'L':
5754     case 'm': case 'M':
5755     case 'n': case 'N':
5756     case 'o': case 'O':
5757     case 'p': case 'P':
5758     case 'q': case 'Q':
5759     case 'r': case 'R':
5760     case 's': case 'S':
5761     case 't': case 'T':
5762     case 'u': case 'U':
5763               case 'V':
5764     case 'w': case 'W':
5765               case 'X':
5766     case 'y': case 'Y':
5767     case 'z': case 'Z':
5768
5769       keylookup: {
5770         bool anydelim;
5771         I32 tmp;
5772
5773         orig_keyword = 0;
5774         gv = NULL;
5775         gvp = NULL;
5776
5777         PL_bufptr = s;
5778         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5779
5780         /* Some keywords can be followed by any delimiter, including ':' */
5781         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5782                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5783                              (PL_tokenbuf[0] == 'q' &&
5784                               strchr("qwxr", PL_tokenbuf[1])))));
5785
5786         /* x::* is just a word, unless x is "CORE" */
5787         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5788             goto just_a_word;
5789
5790         d = s;
5791         while (d < PL_bufend && isSPACE(*d))
5792                 d++;    /* no comments skipped here, or s### is misparsed */
5793
5794         /* Is this a word before a => operator? */
5795         if (*d == '=' && d[1] == '>') {
5796             CLINE;
5797             pl_yylval.opval
5798                 = (OP*)newSVOP(OP_CONST, 0,
5799                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5800             pl_yylval.opval->op_private = OPpCONST_BARE;
5801             TERM(WORD);
5802         }
5803
5804         /* Check for plugged-in keyword */
5805         {
5806             OP *o;
5807             int result;
5808             char *saved_bufptr = PL_bufptr;
5809             PL_bufptr = s;
5810             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5811             s = PL_bufptr;
5812             if (result == KEYWORD_PLUGIN_DECLINE) {
5813                 /* not a plugged-in keyword */
5814                 PL_bufptr = saved_bufptr;
5815             } else if (result == KEYWORD_PLUGIN_STMT) {
5816                 pl_yylval.opval = o;
5817                 CLINE;
5818                 PL_expect = XSTATE;
5819                 return REPORT(PLUGSTMT);
5820             } else if (result == KEYWORD_PLUGIN_EXPR) {
5821                 pl_yylval.opval = o;
5822                 CLINE;
5823                 PL_expect = XOPERATOR;
5824                 return REPORT(PLUGEXPR);
5825             } else {
5826                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5827                                         PL_tokenbuf);
5828             }
5829         }
5830
5831         /* Check for built-in keyword */
5832         tmp = keyword(PL_tokenbuf, len, 0);
5833
5834         /* Is this a label? */
5835         if (!anydelim && PL_expect == XSTATE
5836               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5837             if (tmp)
5838                 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5839             s = d + 1;
5840             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5841             CLINE;
5842             TOKEN(LABEL);
5843         }
5844
5845         if (tmp < 0) {                  /* second-class keyword? */
5846             GV *ogv = NULL;     /* override (winner) */
5847             GV *hgv = NULL;     /* hidden (loser) */
5848             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5849                 CV *cv;
5850                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5851                     (cv = GvCVu(gv)))
5852                 {
5853                     if (GvIMPORTED_CV(gv))
5854                         ogv = gv;
5855                     else if (! CvMETHOD(cv))
5856                         hgv = gv;
5857                 }
5858                 if (!ogv &&
5859                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5860                     (gv = *gvp) && isGV_with_GP(gv) &&
5861                     GvCVu(gv) && GvIMPORTED_CV(gv))
5862                 {
5863                     ogv = gv;
5864                 }
5865             }
5866             if (ogv) {
5867                 orig_keyword = tmp;
5868                 tmp = 0;                /* overridden by import or by GLOBAL */
5869             }
5870             else if (gv && !gvp
5871                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5872                      && GvCVu(gv))
5873             {
5874                 tmp = 0;                /* any sub overrides "weak" keyword */
5875             }
5876             else {                      /* no override */
5877                 tmp = -tmp;
5878                 if (tmp == KEY_dump) {
5879                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5880                                    "dump() better written as CORE::dump()");
5881                 }
5882                 gv = NULL;
5883                 gvp = 0;
5884                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
5885                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5886                                    "Ambiguous call resolved as CORE::%s(), %s",
5887                                    GvENAME(hgv), "qualify as such or use &");
5888             }
5889         }
5890
5891       reserved_word:
5892         switch (tmp) {
5893
5894         default:                        /* not a keyword */
5895             /* Trade off - by using this evil construction we can pull the
5896                variable gv into the block labelled keylookup. If not, then
5897                we have to give it function scope so that the goto from the
5898                earlier ':' case doesn't bypass the initialisation.  */
5899             if (0) {
5900             just_a_word_zero_gv:
5901                 gv = NULL;
5902                 gvp = NULL;
5903                 orig_keyword = 0;
5904             }
5905           just_a_word: {
5906                 SV *sv;
5907                 int pkgname = 0;
5908                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5909                 OP *rv2cv_op;
5910                 CV *cv;
5911 #ifdef PERL_MAD
5912                 SV *nextPL_nextwhite = 0;
5913 #endif
5914
5915
5916                 /* Get the rest if it looks like a package qualifier */
5917
5918                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5919                     STRLEN morelen;
5920                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5921                                   TRUE, &morelen);
5922                     if (!morelen)
5923                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5924                                 *s == '\'' ? "'" : "::");
5925                     len += morelen;
5926                     pkgname = 1;
5927                 }
5928
5929                 if (PL_expect == XOPERATOR) {
5930                     if (PL_bufptr == PL_linestart) {
5931                         CopLINE_dec(PL_curcop);
5932                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5933                         CopLINE_inc(PL_curcop);
5934                     }
5935                     else
5936                         no_op("Bareword",s);
5937                 }
5938
5939                 /* Look for a subroutine with this name in current package,
5940                    unless name is "Foo::", in which case Foo is a bearword
5941                    (and a package name). */
5942
5943                 if (len > 2 && !PL_madskills &&
5944                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5945                 {
5946                     if (ckWARN(WARN_BAREWORD)
5947                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5948                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5949                             "Bareword \"%s\" refers to nonexistent package",
5950                              PL_tokenbuf);
5951                     len -= 2;
5952                     PL_tokenbuf[len] = '\0';
5953                     gv = NULL;
5954                     gvp = 0;
5955                 }
5956                 else {
5957                     if (!gv) {
5958                         /* Mustn't actually add anything to a symbol table.
5959                            But also don't want to "initialise" any placeholder
5960                            constants that might already be there into full
5961                            blown PVGVs with attached PVCV.  */
5962                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5963                                                GV_NOADD_NOINIT, SVt_PVCV);
5964                     }
5965                     len = 0;
5966                 }
5967
5968                 /* if we saw a global override before, get the right name */
5969
5970                 if (gvp) {
5971                     sv = newSVpvs("CORE::GLOBAL::");
5972                     sv_catpv(sv,PL_tokenbuf);
5973                 }
5974                 else {
5975                     /* If len is 0, newSVpv does strlen(), which is correct.
5976                        If len is non-zero, then it will be the true length,
5977                        and so the scalar will be created correctly.  */
5978                     sv = newSVpv(PL_tokenbuf,len);
5979                 }
5980 #ifdef PERL_MAD
5981                 if (PL_madskills && !PL_thistoken) {
5982                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5983                     PL_thistoken = newSVpvn(start,s - start);
5984                     PL_realtokenstart = s - SvPVX(PL_linestr);
5985                 }
5986 #endif
5987
5988                 /* Presume this is going to be a bareword of some sort. */
5989
5990                 CLINE;
5991                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5992                 pl_yylval.opval->op_private = OPpCONST_BARE;
5993                 /* UTF-8 package name? */
5994                 if (UTF && !IN_BYTES &&
5995                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5996                     SvUTF8_on(sv);
5997
5998                 /* And if "Foo::", then that's what it certainly is. */
5999
6000                 if (len)
6001                     goto safe_bareword;
6002
6003                 cv = NULL;
6004                 {
6005                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6006                     const_op->op_private = OPpCONST_BARE;
6007                     rv2cv_op = newCVREF(0, const_op);
6008                 }
6009                 if (rv2cv_op->op_type == OP_RV2CV &&
6010                         (rv2cv_op->op_flags & OPf_KIDS)) {
6011                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6012                     switch (rv_op->op_type) {
6013                         case OP_CONST: {
6014                             SV *sv = cSVOPx_sv(rv_op);
6015                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6016                                 cv = (CV*)SvRV(sv);
6017                         } break;
6018                         case OP_GV: {
6019                             GV *gv = cGVOPx_gv(rv_op);
6020                             CV *maybe_cv = GvCVu(gv);
6021                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6022                                 cv = maybe_cv;
6023                         } break;
6024                     }
6025                 }
6026
6027                 /* See if it's the indirect object for a list operator. */
6028
6029                 if (PL_oldoldbufptr &&
6030                     PL_oldoldbufptr < PL_bufptr &&
6031                     (PL_oldoldbufptr == PL_last_lop
6032                      || PL_oldoldbufptr == PL_last_uni) &&
6033                     /* NO SKIPSPACE BEFORE HERE! */
6034                     (PL_expect == XREF ||
6035                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6036                 {
6037                     bool immediate_paren = *s == '(';
6038
6039                     /* (Now we can afford to cross potential line boundary.) */
6040                     s = SKIPSPACE2(s,nextPL_nextwhite);
6041 #ifdef PERL_MAD
6042                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6043 #endif
6044
6045                     /* Two barewords in a row may indicate method call. */
6046
6047                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6048                         (tmp = intuit_method(s, gv, cv))) {
6049                         op_free(rv2cv_op);
6050                         return REPORT(tmp);
6051                     }
6052
6053                     /* If not a declared subroutine, it's an indirect object. */
6054                     /* (But it's an indir obj regardless for sort.) */
6055                     /* Also, if "_" follows a filetest operator, it's a bareword */
6056
6057                     if (
6058                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6059                          (!cv &&
6060                         (PL_last_lop_op != OP_MAPSTART &&
6061                          PL_last_lop_op != OP_GREPSTART))))
6062                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6063                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6064                        )
6065                     {
6066                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6067                         goto bareword;
6068                     }
6069                 }
6070
6071                 PL_expect = XOPERATOR;
6072 #ifdef PERL_MAD
6073                 if (isSPACE(*s))
6074                     s = SKIPSPACE2(s,nextPL_nextwhite);
6075                 PL_nextwhite = nextPL_nextwhite;
6076 #else
6077                 s = skipspace(s);
6078 #endif
6079
6080                 /* Is this a word before a => operator? */
6081                 if (*s == '=' && s[1] == '>' && !pkgname) {
6082                     op_free(rv2cv_op);
6083                     CLINE;
6084                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6085                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6086                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6087                     TERM(WORD);
6088                 }
6089
6090                 /* If followed by a paren, it's certainly a subroutine. */
6091                 if (*s == '(') {
6092                     CLINE;
6093                     if (cv) {
6094                         d = s + 1;
6095                         while (SPACE_OR_TAB(*d))
6096                             d++;
6097                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6098                             s = d + 1;
6099                             goto its_constant;
6100                         }
6101                     }
6102 #ifdef PERL_MAD
6103                     if (PL_madskills) {
6104                         PL_nextwhite = PL_thiswhite;
6105                         PL_thiswhite = 0;
6106                     }
6107                     start_force(PL_curforce);
6108 #endif
6109                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6110                     PL_expect = XOPERATOR;
6111 #ifdef PERL_MAD
6112                     if (PL_madskills) {
6113                         PL_nextwhite = nextPL_nextwhite;
6114                         curmad('X', PL_thistoken);
6115                         PL_thistoken = newSVpvs("");
6116                     }
6117 #endif
6118                     op_free(rv2cv_op);
6119                     force_next(WORD);
6120                     pl_yylval.ival = 0;
6121                     TOKEN('&');
6122                 }
6123
6124                 /* If followed by var or block, call it a method (unless sub) */
6125
6126                 if ((*s == '$' || *s == '{') && !cv) {
6127                     op_free(rv2cv_op);
6128                     PL_last_lop = PL_oldbufptr;
6129                     PL_last_lop_op = OP_METHOD;
6130                     PREBLOCK(METHOD);
6131                 }
6132
6133                 /* If followed by a bareword, see if it looks like indir obj. */
6134
6135                 if (!orig_keyword
6136                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6137                         && (tmp = intuit_method(s, gv, cv))) {
6138                     op_free(rv2cv_op);
6139                     return REPORT(tmp);
6140                 }
6141
6142                 /* Not a method, so call it a subroutine (if defined) */
6143
6144                 if (cv) {
6145                     if (lastchar == '-')
6146                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6147                                          "Ambiguous use of -%s resolved as -&%s()",
6148                                          PL_tokenbuf, PL_tokenbuf);
6149                     /* Check for a constant sub */
6150                     if ((sv = cv_const_sv(cv))) {
6151                   its_constant:
6152                         op_free(rv2cv_op);
6153                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6154                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6155                         pl_yylval.opval->op_private = 0;
6156                         TOKEN(WORD);
6157                     }
6158
6159                     op_free(pl_yylval.opval);
6160                     pl_yylval.opval = rv2cv_op;
6161                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6162                     PL_last_lop = PL_oldbufptr;
6163                     PL_last_lop_op = OP_ENTERSUB;
6164                     /* Is there a prototype? */
6165                     if (
6166 #ifdef PERL_MAD
6167                         cv &&
6168 #endif
6169                         SvPOK(cv))
6170                     {
6171                         STRLEN protolen;
6172                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6173                         if (!protolen)
6174                             TERM(FUNC0SUB);
6175                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6176                             OPERATOR(UNIOPSUB);
6177                         while (*proto == ';')
6178                             proto++;
6179                         if (*proto == '&' && *s == '{') {
6180                             if (PL_curstash)
6181                                 sv_setpvs(PL_subname, "__ANON__");
6182                             else
6183                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6184                             PREBLOCK(LSTOPSUB);
6185                         }
6186                     }
6187 #ifdef PERL_MAD
6188                     {
6189                         if (PL_madskills) {
6190                             PL_nextwhite = PL_thiswhite;
6191                             PL_thiswhite = 0;
6192                         }
6193                         start_force(PL_curforce);
6194                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6195                         PL_expect = XTERM;
6196                         if (PL_madskills) {
6197                             PL_nextwhite = nextPL_nextwhite;
6198                             curmad('X', PL_thistoken);
6199                             PL_thistoken = newSVpvs("");
6200                         }
6201                         force_next(WORD);
6202                         TOKEN(NOAMP);
6203                     }
6204                 }
6205
6206                 /* Guess harder when madskills require "best effort". */
6207                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6208                     int probable_sub = 0;
6209                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6210                         probable_sub = 1;
6211                     else if (isALPHA(*s)) {
6212                         char tmpbuf[1024];
6213                         STRLEN tmplen;
6214                         d = s;
6215                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6216                         if (!keyword(tmpbuf, tmplen, 0))
6217                             probable_sub = 1;
6218                         else {
6219                             while (d < PL_bufend && isSPACE(*d))
6220                                 d++;
6221                             if (*d == '=' && d[1] == '>')
6222                                 probable_sub = 1;
6223                         }
6224                     }
6225                     if (probable_sub) {
6226                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6227                         op_free(pl_yylval.opval);
6228                         pl_yylval.opval = rv2cv_op;
6229                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6230                         PL_last_lop = PL_oldbufptr;
6231                         PL_last_lop_op = OP_ENTERSUB;
6232                         PL_nextwhite = PL_thiswhite;
6233                         PL_thiswhite = 0;
6234                         start_force(PL_curforce);
6235                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6236                         PL_expect = XTERM;
6237                         PL_nextwhite = nextPL_nextwhite;
6238                         curmad('X', PL_thistoken);
6239                         PL_thistoken = newSVpvs("");
6240                         force_next(WORD);
6241                         TOKEN(NOAMP);
6242                     }
6243 #else
6244                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6245                     PL_expect = XTERM;
6246                     force_next(WORD);
6247                     TOKEN(NOAMP);
6248 #endif
6249                 }
6250
6251                 /* Call it a bare word */
6252
6253                 if (PL_hints & HINT_STRICT_SUBS)
6254                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6255                 else {
6256                 bareword:
6257                     /* after "print" and similar functions (corresponding to
6258                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6259                      * a filehandle should be subject to "strict subs".
6260                      * Likewise for the optional indirect-object argument to system
6261                      * or exec, which can't be a bareword */
6262                     if ((PL_last_lop_op == OP_PRINT
6263                             || PL_last_lop_op == OP_PRTF
6264                             || PL_last_lop_op == OP_SAY
6265                             || PL_last_lop_op == OP_SYSTEM
6266                             || PL_last_lop_op == OP_EXEC)
6267                             && (PL_hints & HINT_STRICT_SUBS))
6268                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6269                     if (lastchar != '-') {
6270                         if (ckWARN(WARN_RESERVED)) {
6271                             d = PL_tokenbuf;
6272                             while (isLOWER(*d))
6273                                 d++;
6274                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6275                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6276                                        PL_tokenbuf);
6277                         }
6278                     }
6279                 }
6280                 op_free(rv2cv_op);
6281
6282             safe_bareword:
6283                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6284                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6285                                      "Operator or semicolon missing before %c%s",
6286                                      lastchar, PL_tokenbuf);
6287                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6288                                      "Ambiguous use of %c resolved as operator %c",
6289                                      lastchar, lastchar);
6290                 }
6291                 TOKEN(WORD);
6292             }
6293
6294         case KEY___FILE__:
6295             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6296                                         newSVpv(CopFILE(PL_curcop),0));
6297             TERM(THING);
6298
6299         case KEY___LINE__:
6300             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6301                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6302             TERM(THING);
6303
6304         case KEY___PACKAGE__:
6305             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6306                                         (PL_curstash
6307                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6308                                          : &PL_sv_undef));
6309             TERM(THING);
6310
6311         case KEY___DATA__:
6312         case KEY___END__: {
6313             GV *gv;
6314             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6315                 const char *pname = "main";
6316                 if (PL_tokenbuf[2] == 'D')
6317                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6318                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6319                                 SVt_PVIO);
6320                 GvMULTI_on(gv);
6321                 if (!GvIO(gv))
6322                     GvIOp(gv) = newIO();
6323                 IoIFP(GvIOp(gv)) = PL_rsfp;
6324 #if defined(HAS_FCNTL) && defined(F_SETFD)
6325                 {
6326                     const int fd = PerlIO_fileno(PL_rsfp);
6327                     fcntl(fd,F_SETFD,fd >= 3);
6328                 }
6329 #endif
6330                 /* Mark this internal pseudo-handle as clean */
6331                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6332                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6333                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6334                 else
6335                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6336 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6337                 /* if the script was opened in binmode, we need to revert
6338                  * it to text mode for compatibility; but only iff it has CRs
6339                  * XXX this is a questionable hack at best. */
6340                 if (PL_bufend-PL_bufptr > 2
6341                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6342                 {
6343                     Off_t loc = 0;
6344                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6345                         loc = PerlIO_tell(PL_rsfp);
6346                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6347                     }
6348 #ifdef NETWARE
6349                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6350 #else
6351                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6352 #endif  /* NETWARE */
6353 #ifdef PERLIO_IS_STDIO /* really? */
6354 #  if defined(__BORLANDC__)
6355                         /* XXX see note in do_binmode() */
6356                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6357 #  endif
6358 #endif
6359                         if (loc > 0)
6360                             PerlIO_seek(PL_rsfp, loc, 0);
6361                     }
6362                 }
6363 #endif
6364 #ifdef PERLIO_LAYERS
6365                 if (!IN_BYTES) {
6366                     if (UTF)
6367                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6368                     else if (PL_encoding) {
6369                         SV *name;
6370                         dSP;
6371                         ENTER;
6372                         SAVETMPS;
6373                         PUSHMARK(sp);
6374                         EXTEND(SP, 1);
6375                         XPUSHs(PL_encoding);
6376                         PUTBACK;
6377                         call_method("name", G_SCALAR);
6378                         SPAGAIN;
6379                         name = POPs;
6380                         PUTBACK;
6381                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6382                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6383                                                       SVfARG(name)));
6384                         FREETMPS;
6385                         LEAVE;
6386                     }
6387                 }
6388 #endif
6389 #ifdef PERL_MAD
6390                 if (PL_madskills) {
6391                     if (PL_realtokenstart >= 0) {
6392                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6393                         if (!PL_endwhite)
6394                             PL_endwhite = newSVpvs("");
6395                         sv_catsv(PL_endwhite, PL_thiswhite);
6396                         PL_thiswhite = 0;
6397                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6398                         PL_realtokenstart = -1;
6399                     }
6400                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6401                            != NULL) ;
6402                 }
6403 #endif
6404                 PL_rsfp = NULL;
6405             }
6406             goto fake_eof;
6407         }
6408
6409         case KEY_AUTOLOAD:
6410         case KEY_DESTROY:
6411         case KEY_BEGIN:
6412         case KEY_UNITCHECK:
6413         case KEY_CHECK:
6414         case KEY_INIT:
6415         case KEY_END:
6416             if (PL_expect == XSTATE) {
6417                 s = PL_bufptr;
6418                 goto really_sub;
6419             }
6420             goto just_a_word;
6421
6422         case KEY_CORE:
6423             if (*s == ':' && s[1] == ':') {
6424                 s += 2;
6425                 d = s;
6426                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6427                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6428                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6429                 if (tmp < 0)
6430                     tmp = -tmp;
6431                 else if (tmp == KEY_require || tmp == KEY_do)
6432                     /* that's a way to remember we saw "CORE::" */
6433                     orig_keyword = tmp;
6434                 goto reserved_word;
6435             }
6436             goto just_a_word;
6437
6438         case KEY_abs:
6439             UNI(OP_ABS);
6440
6441         case KEY_alarm:
6442             UNI(OP_ALARM);
6443
6444         case KEY_accept:
6445             LOP(OP_ACCEPT,XTERM);
6446
6447         case KEY_and:
6448             OPERATOR(ANDOP);
6449
6450         case KEY_atan2:
6451             LOP(OP_ATAN2,XTERM);
6452
6453         case KEY_bind:
6454             LOP(OP_BIND,XTERM);
6455
6456         case KEY_binmode:
6457             LOP(OP_BINMODE,XTERM);
6458
6459         case KEY_bless:
6460             LOP(OP_BLESS,XTERM);
6461
6462         case KEY_break:
6463             FUN0(OP_BREAK);
6464
6465         case KEY_chop:
6466             UNI(OP_CHOP);
6467
6468         case KEY_continue:
6469             /* When 'use switch' is in effect, continue has a dual
6470                life as a control operator. */
6471             {
6472                 if (!FEATURE_IS_ENABLED("switch"))
6473                     PREBLOCK(CONTINUE);
6474                 else {
6475                     /* We have to disambiguate the two senses of
6476                       "continue". If the next token is a '{' then
6477                       treat it as the start of a continue block;
6478                       otherwise treat it as a control operator.
6479                      */
6480                     s = skipspace(s);
6481                     if (*s == '{')
6482             PREBLOCK(CONTINUE);
6483                     else
6484                         FUN0(OP_CONTINUE);
6485                 }
6486             }
6487
6488         case KEY_chdir:
6489             /* may use HOME */
6490             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6491             UNI(OP_CHDIR);
6492
6493         case KEY_close:
6494             UNI(OP_CLOSE);
6495
6496         case KEY_closedir:
6497             UNI(OP_CLOSEDIR);
6498
6499         case KEY_cmp:
6500             Eop(OP_SCMP);
6501
6502         case KEY_caller:
6503             UNI(OP_CALLER);
6504
6505         case KEY_crypt:
6506 #ifdef FCRYPT
6507             if (!PL_cryptseen) {
6508                 PL_cryptseen = TRUE;
6509                 init_des();
6510             }
6511 #endif
6512             LOP(OP_CRYPT,XTERM);
6513
6514         case KEY_chmod:
6515             LOP(OP_CHMOD,XTERM);
6516
6517         case KEY_chown:
6518             LOP(OP_CHOWN,XTERM);
6519
6520         case KEY_connect:
6521             LOP(OP_CONNECT,XTERM);
6522
6523         case KEY_chr:
6524             UNI(OP_CHR);
6525
6526         case KEY_cos:
6527             UNI(OP_COS);
6528
6529         case KEY_chroot:
6530             UNI(OP_CHROOT);
6531
6532         case KEY_default:
6533             PREBLOCK(DEFAULT);
6534
6535         case KEY_do:
6536             s = SKIPSPACE1(s);
6537             if (*s == '{')
6538                 PRETERMBLOCK(DO);
6539             if (*s != '\'')
6540                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6541             if (orig_keyword == KEY_do) {
6542                 orig_keyword = 0;
6543                 pl_yylval.ival = 1;
6544             }
6545             else
6546                 pl_yylval.ival = 0;
6547             OPERATOR(DO);
6548
6549         case KEY_die:
6550             PL_hints |= HINT_BLOCK_SCOPE;
6551             LOP(OP_DIE,XTERM);
6552
6553         case KEY_defined:
6554             UNI(OP_DEFINED);
6555
6556         case KEY_delete:
6557             UNI(OP_DELETE);
6558
6559         case KEY_dbmopen:
6560             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6561             LOP(OP_DBMOPEN,XTERM);
6562
6563         case KEY_dbmclose:
6564             UNI(OP_DBMCLOSE);
6565
6566         case KEY_dump:
6567             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6568             LOOPX(OP_DUMP);
6569
6570         case KEY_else:
6571             PREBLOCK(ELSE);
6572
6573         case KEY_elsif:
6574             pl_yylval.ival = CopLINE(PL_curcop);
6575             OPERATOR(ELSIF);
6576
6577         case KEY_eq:
6578             Eop(OP_SEQ);
6579
6580         case KEY_exists:
6581             UNI(OP_EXISTS);
6582
6583         case KEY_exit:
6584             if (PL_madskills)
6585                 UNI(OP_INT);
6586             UNI(OP_EXIT);
6587
6588         case KEY_eval:
6589             s = SKIPSPACE1(s);
6590             PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6591             UNIBRACK(OP_ENTEREVAL);
6592
6593         case KEY_eof:
6594             UNI(OP_EOF);
6595
6596         case KEY_exp:
6597             UNI(OP_EXP);
6598
6599         case KEY_each:
6600             UNI(OP_EACH);
6601
6602         case KEY_exec:
6603             LOP(OP_EXEC,XREF);
6604
6605         case KEY_endhostent:
6606             FUN0(OP_EHOSTENT);
6607
6608         case KEY_endnetent:
6609             FUN0(OP_ENETENT);
6610
6611         case KEY_endservent:
6612             FUN0(OP_ESERVENT);
6613
6614         case KEY_endprotoent:
6615             FUN0(OP_EPROTOENT);
6616
6617         case KEY_endpwent:
6618             FUN0(OP_EPWENT);
6619
6620         case KEY_endgrent:
6621             FUN0(OP_EGRENT);
6622
6623         case KEY_for:
6624         case KEY_foreach:
6625             pl_yylval.ival = CopLINE(PL_curcop);
6626             s = SKIPSPACE1(s);
6627             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6628                 char *p = s;
6629 #ifdef PERL_MAD
6630                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6631 #endif
6632
6633                 if ((PL_bufend - p) >= 3 &&
6634                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6635                     p += 2;
6636                 else if ((PL_bufend - p) >= 4 &&
6637                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6638                     p += 3;
6639                 p = PEEKSPACE(p);
6640                 if (isIDFIRST_lazy_if(p,UTF)) {
6641                     p = scan_ident(p, PL_bufend,
6642                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6643                     p = PEEKSPACE(p);
6644                 }
6645                 if (*p != '$')
6646                     Perl_croak(aTHX_ "Missing $ on loop variable");
6647 #ifdef PERL_MAD
6648                 s = SvPVX(PL_linestr) + soff;
6649 #endif
6650             }
6651             OPERATOR(FOR);
6652
6653         case KEY_formline:
6654             LOP(OP_FORMLINE,XTERM);
6655
6656         case KEY_fork:
6657             FUN0(OP_FORK);
6658
6659         case KEY_fcntl:
6660             LOP(OP_FCNTL,XTERM);
6661
6662         case KEY_fileno:
6663             UNI(OP_FILENO);
6664
6665         case KEY_flock:
6666             LOP(OP_FLOCK,XTERM);
6667
6668         case KEY_gt:
6669             Rop(OP_SGT);
6670
6671         case KEY_ge:
6672             Rop(OP_SGE);
6673
6674         case KEY_grep:
6675             LOP(OP_GREPSTART, XREF);
6676
6677         case KEY_goto:
6678             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6679             LOOPX(OP_GOTO);
6680
6681         case KEY_gmtime:
6682             UNI(OP_GMTIME);
6683
6684         case KEY_getc:
6685             UNIDOR(OP_GETC);
6686
6687         case KEY_getppid:
6688             FUN0(OP_GETPPID);
6689
6690         case KEY_getpgrp:
6691             UNI(OP_GETPGRP);
6692
6693         case KEY_getpriority:
6694             LOP(OP_GETPRIORITY,XTERM);
6695
6696         case KEY_getprotobyname:
6697             UNI(OP_GPBYNAME);
6698
6699         case KEY_getprotobynumber:
6700             LOP(OP_GPBYNUMBER,XTERM);
6701
6702         case KEY_getprotoent:
6703             FUN0(OP_GPROTOENT);
6704
6705         case KEY_getpwent:
6706             FUN0(OP_GPWENT);
6707
6708         case KEY_getpwnam:
6709             UNI(OP_GPWNAM);
6710
6711         case KEY_getpwuid:
6712             UNI(OP_GPWUID);
6713
6714         case KEY_getpeername:
6715             UNI(OP_GETPEERNAME);
6716
6717         case KEY_gethostbyname:
6718             UNI(OP_GHBYNAME);
6719
6720         case KEY_gethostbyaddr:
6721             LOP(OP_GHBYADDR,XTERM);
6722
6723         case KEY_gethostent:
6724             FUN0(OP_GHOSTENT);
6725
6726         case KEY_getnetbyname:
6727             UNI(OP_GNBYNAME);
6728
6729         case KEY_getnetbyaddr:
6730             LOP(OP_GNBYADDR,XTERM);
6731
6732         case KEY_getnetent:
6733             FUN0(OP_GNETENT);
6734
6735         case KEY_getservbyname:
6736             LOP(OP_GSBYNAME,XTERM);
6737
6738         case KEY_getservbyport:
6739             LOP(OP_GSBYPORT,XTERM);
6740
6741         case KEY_getservent:
6742             FUN0(OP_GSERVENT);
6743
6744         case KEY_getsockname:
6745             UNI(OP_GETSOCKNAME);
6746
6747         case KEY_getsockopt:
6748             LOP(OP_GSOCKOPT,XTERM);
6749
6750         case KEY_getgrent:
6751             FUN0(OP_GGRENT);
6752
6753         case KEY_getgrnam:
6754             UNI(OP_GGRNAM);
6755
6756         case KEY_getgrgid:
6757             UNI(OP_GGRGID);
6758
6759         case KEY_getlogin:
6760             FUN0(OP_GETLOGIN);
6761
6762         case KEY_given:
6763             pl_yylval.ival = CopLINE(PL_curcop);
6764             OPERATOR(GIVEN);
6765
6766         case KEY_glob:
6767             LOP(OP_GLOB,XTERM);
6768
6769         case KEY_hex:
6770             UNI(OP_HEX);
6771
6772         case KEY_if:
6773             pl_yylval.ival = CopLINE(PL_curcop);
6774             OPERATOR(IF);
6775
6776         case KEY_index:
6777             LOP(OP_INDEX,XTERM);
6778
6779         case KEY_int:
6780             UNI(OP_INT);
6781
6782         case KEY_ioctl:
6783             LOP(OP_IOCTL,XTERM);
6784
6785         case KEY_join:
6786             LOP(OP_JOIN,XTERM);
6787
6788         case KEY_keys:
6789             UNI(OP_KEYS);
6790
6791         case KEY_kill:
6792             LOP(OP_KILL,XTERM);
6793
6794         case KEY_last:
6795             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6796             LOOPX(OP_LAST);
6797
6798         case KEY_lc:
6799             UNI(OP_LC);
6800
6801         case KEY_lcfirst:
6802             UNI(OP_LCFIRST);
6803
6804         case KEY_local:
6805             pl_yylval.ival = 0;
6806             OPERATOR(LOCAL);
6807
6808         case KEY_length:
6809             UNI(OP_LENGTH);
6810
6811         case KEY_lt:
6812             Rop(OP_SLT);
6813
6814         case KEY_le:
6815             Rop(OP_SLE);
6816
6817         case KEY_localtime:
6818             UNI(OP_LOCALTIME);
6819
6820         case KEY_log:
6821             UNI(OP_LOG);
6822
6823         case KEY_link:
6824             LOP(OP_LINK,XTERM);
6825
6826         case KEY_listen:
6827             LOP(OP_LISTEN,XTERM);
6828
6829         case KEY_lock:
6830             UNI(OP_LOCK);
6831
6832         case KEY_lstat:
6833             UNI(OP_LSTAT);
6834
6835         case KEY_m:
6836             s = scan_pat(s,OP_MATCH);
6837             TERM(sublex_start());
6838
6839         case KEY_map:
6840             LOP(OP_MAPSTART, XREF);
6841
6842         case KEY_mkdir:
6843             LOP(OP_MKDIR,XTERM);
6844
6845         case KEY_msgctl:
6846             LOP(OP_MSGCTL,XTERM);
6847
6848         case KEY_msgget:
6849             LOP(OP_MSGGET,XTERM);
6850
6851         case KEY_msgrcv:
6852             LOP(OP_MSGRCV,XTERM);
6853
6854         case KEY_msgsnd:
6855             LOP(OP_MSGSND,XTERM);
6856
6857         case KEY_our:
6858         case KEY_my:
6859         case KEY_state:
6860             PL_in_my = (U16)tmp;
6861             s = SKIPSPACE1(s);
6862             if (isIDFIRST_lazy_if(s,UTF)) {
6863 #ifdef PERL_MAD
6864                 char* start = s;
6865 #endif
6866                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6867                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6868                     goto really_sub;
6869                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6870                 if (!PL_in_my_stash) {
6871                     char tmpbuf[1024];
6872                     PL_bufptr = s;
6873                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6874                     yyerror(tmpbuf);
6875                 }
6876 #ifdef PERL_MAD
6877                 if (PL_madskills) {     /* just add type to declarator token */
6878                     sv_catsv(PL_thistoken, PL_nextwhite);
6879                     PL_nextwhite = 0;
6880                     sv_catpvn(PL_thistoken, start, s - start);
6881                 }
6882 #endif
6883             }
6884             pl_yylval.ival = 1;
6885             OPERATOR(MY);
6886
6887         case KEY_next:
6888             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6889             LOOPX(OP_NEXT);
6890
6891         case KEY_ne:
6892             Eop(OP_SNE);
6893
6894         case KEY_no:
6895             s = tokenize_use(0, s);
6896             OPERATOR(USE);
6897
6898         case KEY_not:
6899             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6900                 FUN1(OP_NOT);
6901             else
6902                 OPERATOR(NOTOP);
6903
6904         case KEY_open:
6905             s = SKIPSPACE1(s);
6906             if (isIDFIRST_lazy_if(s,UTF)) {
6907                 const char *t;
6908                 for (d = s; isALNUM_lazy_if(d,UTF);)
6909                     d++;
6910                 for (t=d; isSPACE(*t);)
6911                     t++;
6912                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6913                     /* [perl #16184] */
6914                     && !(t[0] == '=' && t[1] == '>')
6915                 ) {
6916                     int parms_len = (int)(d-s);
6917                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6918                            "Precedence problem: open %.*s should be open(%.*s)",
6919                             parms_len, s, parms_len, s);
6920                 }
6921             }
6922             LOP(OP_OPEN,XTERM);
6923
6924         case KEY_or:
6925             pl_yylval.ival = OP_OR;
6926             OPERATOR(OROP);
6927
6928         case KEY_ord:
6929             UNI(OP_ORD);
6930
6931         case KEY_oct:
6932             UNI(OP_OCT);
6933
6934         case KEY_opendir:
6935             LOP(OP_OPEN_DIR,XTERM);
6936
6937         case KEY_print:
6938             checkcomma(s,PL_tokenbuf,"filehandle");
6939             LOP(OP_PRINT,XREF);
6940
6941         case KEY_printf:
6942             checkcomma(s,PL_tokenbuf,"filehandle");
6943             LOP(OP_PRTF,XREF);
6944
6945         case KEY_prototype:
6946             UNI(OP_PROTOTYPE);
6947
6948         case KEY_push:
6949             LOP(OP_PUSH,XTERM);
6950
6951         case KEY_pop:
6952             UNIDOR(OP_POP);
6953
6954         case KEY_pos:
6955             UNIDOR(OP_POS);
6956
6957         case KEY_pack:
6958             LOP(OP_PACK,XTERM);
6959
6960         case KEY_package:
6961             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6962             s = force_version(s, FALSE);
6963             OPERATOR(PACKAGE);
6964
6965         case KEY_pipe:
6966             LOP(OP_PIPE_OP,XTERM);
6967
6968         case KEY_q:
6969             s = scan_str(s,!!PL_madskills,FALSE);
6970             if (!s)
6971                 missingterm(NULL);
6972             pl_yylval.ival = OP_CONST;
6973             TERM(sublex_start());
6974
6975         case KEY_quotemeta:
6976             UNI(OP_QUOTEMETA);
6977
6978         case KEY_qw:
6979             s = scan_str(s,!!PL_madskills,FALSE);
6980             if (!s)
6981                 missingterm(NULL);
6982             PL_expect = XOPERATOR;
6983             force_next(')');
6984             if (SvCUR(PL_lex_stuff)) {
6985                 OP *words = NULL;
6986                 int warned = 0;
6987                 d = SvPV_force(PL_lex_stuff, len);
6988                 while (len) {
6989                     for (; isSPACE(*d) && len; --len, ++d)
6990                         /**/;
6991                     if (len) {
6992                         SV *sv;
6993                         const char *b = d;
6994                         if (!warned && ckWARN(WARN_QW)) {
6995                             for (; !isSPACE(*d) && len; --len, ++d) {
6996                                 if (*d == ',') {
6997                                     Perl_warner(aTHX_ packWARN(WARN_QW),
6998                                         "Possible attempt to separate words with commas");
6999                                     ++warned;
7000                                 }
7001                                 else if (*d == '#') {
7002                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7003                                         "Possible attempt to put comments in qw() list");
7004                                     ++warned;
7005                                 }
7006                             }
7007                         }
7008                         else {
7009                             for (; !isSPACE(*d) && len; --len, ++d)
7010                                 /**/;
7011                         }
7012                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7013                         words = append_elem(OP_LIST, words,
7014                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7015                     }
7016                 }
7017                 if (words) {
7018                     start_force(PL_curforce);
7019                     NEXTVAL_NEXTTOKE.opval = words;
7020                     force_next(THING);
7021                 }
7022             }
7023             if (PL_lex_stuff) {
7024                 SvREFCNT_dec(PL_lex_stuff);
7025                 PL_lex_stuff = NULL;
7026             }
7027             PL_expect = XTERM;
7028             TOKEN('(');
7029
7030         case KEY_qq:
7031             s = scan_str(s,!!PL_madskills,FALSE);
7032             if (!s)
7033                 missingterm(NULL);
7034             pl_yylval.ival = OP_STRINGIFY;
7035             if (SvIVX(PL_lex_stuff) == '\'')
7036                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7037             TERM(sublex_start());
7038
7039         case KEY_qr:
7040             s = scan_pat(s,OP_QR);
7041             TERM(sublex_start());
7042
7043         case KEY_qx:
7044             s = scan_str(s,!!PL_madskills,FALSE);
7045             if (!s)
7046                 missingterm(NULL);
7047             readpipe_override();
7048             TERM(sublex_start());
7049
7050         case KEY_return:
7051             OLDLOP(OP_RETURN);
7052
7053         case KEY_require:
7054             s = SKIPSPACE1(s);
7055             if (isDIGIT(*s)) {
7056                 s = force_version(s, FALSE);
7057             }
7058             else if (*s != 'v' || !isDIGIT(s[1])
7059                     || (s = force_version(s, TRUE), *s == 'v'))
7060             {
7061                 *PL_tokenbuf = '\0';
7062                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7063                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7064                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7065                 else if (*s == '<')
7066                     yyerror("<> should be quotes");
7067             }
7068             if (orig_keyword == KEY_require) {
7069                 orig_keyword = 0;
7070                 pl_yylval.ival = 1;
7071             }
7072             else
7073                 pl_yylval.ival = 0;
7074             PL_expect = XTERM;
7075             PL_bufptr = s;
7076             PL_last_uni = PL_oldbufptr;
7077             PL_last_lop_op = OP_REQUIRE;
7078             s = skipspace(s);
7079             return REPORT( (int)REQUIRE );
7080
7081         case KEY_reset:
7082             UNI(OP_RESET);
7083
7084         case KEY_redo:
7085             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7086             LOOPX(OP_REDO);
7087
7088         case KEY_rename:
7089             LOP(OP_RENAME,XTERM);
7090
7091         case KEY_rand:
7092             UNI(OP_RAND);
7093
7094         case KEY_rmdir:
7095             UNI(OP_RMDIR);
7096
7097         case KEY_rindex:
7098             LOP(OP_RINDEX,XTERM);
7099
7100         case KEY_read:
7101             LOP(OP_READ,XTERM);
7102
7103         case KEY_readdir:
7104             UNI(OP_READDIR);
7105
7106         case KEY_readline:
7107             UNIDOR(OP_READLINE);
7108
7109         case KEY_readpipe:
7110             UNIDOR(OP_BACKTICK);
7111
7112         case KEY_rewinddir:
7113             UNI(OP_REWINDDIR);
7114
7115         case KEY_recv:
7116             LOP(OP_RECV,XTERM);
7117
7118         case KEY_reverse:
7119             LOP(OP_REVERSE,XTERM);
7120
7121         case KEY_readlink:
7122             UNIDOR(OP_READLINK);
7123
7124         case KEY_ref:
7125             UNI(OP_REF);
7126
7127         case KEY_s:
7128             s = scan_subst(s);
7129             if (pl_yylval.opval)
7130                 TERM(sublex_start());
7131             else
7132                 TOKEN(1);       /* force error */
7133
7134         case KEY_say:
7135             checkcomma(s,PL_tokenbuf,"filehandle");
7136             LOP(OP_SAY,XREF);
7137
7138         case KEY_chomp:
7139             UNI(OP_CHOMP);
7140
7141         case KEY_scalar:
7142             UNI(OP_SCALAR);
7143
7144         case KEY_select:
7145             LOP(OP_SELECT,XTERM);
7146
7147         case KEY_seek:
7148             LOP(OP_SEEK,XTERM);
7149
7150         case KEY_semctl:
7151             LOP(OP_SEMCTL,XTERM);
7152
7153         case KEY_semget:
7154             LOP(OP_SEMGET,XTERM);
7155
7156         case KEY_semop:
7157             LOP(OP_SEMOP,XTERM);
7158
7159         case KEY_send:
7160             LOP(OP_SEND,XTERM);
7161
7162         case KEY_setpgrp:
7163             LOP(OP_SETPGRP,XTERM);
7164
7165         case KEY_setpriority:
7166             LOP(OP_SETPRIORITY,XTERM);
7167
7168         case KEY_sethostent:
7169             UNI(OP_SHOSTENT);
7170
7171         case KEY_setnetent:
7172             UNI(OP_SNETENT);
7173
7174         case KEY_setservent:
7175             UNI(OP_SSERVENT);
7176
7177         case KEY_setprotoent:
7178             UNI(OP_SPROTOENT);
7179
7180         case KEY_setpwent:
7181             FUN0(OP_SPWENT);
7182
7183         case KEY_setgrent:
7184             FUN0(OP_SGRENT);
7185
7186         case KEY_seekdir:
7187             LOP(OP_SEEKDIR,XTERM);
7188
7189         case KEY_setsockopt:
7190             LOP(OP_SSOCKOPT,XTERM);
7191
7192         case KEY_shift:
7193             UNIDOR(OP_SHIFT);
7194
7195         case KEY_shmctl:
7196             LOP(OP_SHMCTL,XTERM);
7197
7198         case KEY_shmget:
7199             LOP(OP_SHMGET,XTERM);
7200
7201         case KEY_shmread:
7202             LOP(OP_SHMREAD,XTERM);
7203
7204         case KEY_shmwrite:
7205             LOP(OP_SHMWRITE,XTERM);
7206
7207         case KEY_shutdown:
7208             LOP(OP_SHUTDOWN,XTERM);
7209
7210         case KEY_sin:
7211             UNI(OP_SIN);
7212
7213         case KEY_sleep:
7214             UNI(OP_SLEEP);
7215
7216         case KEY_socket:
7217             LOP(OP_SOCKET,XTERM);
7218
7219         case KEY_socketpair:
7220             LOP(OP_SOCKPAIR,XTERM);
7221
7222         case KEY_sort:
7223             checkcomma(s,PL_tokenbuf,"subroutine name");
7224             s = SKIPSPACE1(s);
7225             if (*s == ';' || *s == ')')         /* probably a close */
7226                 Perl_croak(aTHX_ "sort is now a reserved word");
7227             PL_expect = XTERM;
7228             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7229             LOP(OP_SORT,XREF);
7230
7231         case KEY_split:
7232             LOP(OP_SPLIT,XTERM);
7233
7234         case KEY_sprintf:
7235             LOP(OP_SPRINTF,XTERM);
7236
7237         case KEY_splice:
7238             LOP(OP_SPLICE,XTERM);
7239
7240         case KEY_sqrt:
7241             UNI(OP_SQRT);
7242
7243         case KEY_srand:
7244             UNI(OP_SRAND);
7245
7246         case KEY_stat:
7247             UNI(OP_STAT);
7248
7249         case KEY_study:
7250             UNI(OP_STUDY);
7251
7252         case KEY_substr:
7253             LOP(OP_SUBSTR,XTERM);
7254
7255         case KEY_format:
7256         case KEY_sub:
7257           really_sub:
7258             {
7259                 char tmpbuf[sizeof PL_tokenbuf];
7260                 SSize_t tboffset = 0;
7261                 expectation attrful;
7262                 bool have_name, have_proto;
7263                 const int key = tmp;
7264
7265 #ifdef PERL_MAD
7266                 SV *tmpwhite = 0;
7267
7268                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7269                 SV *subtoken = newSVpvn(tstart, s - tstart);
7270                 PL_thistoken = 0;
7271
7272                 d = s;
7273                 s = SKIPSPACE2(s,tmpwhite);
7274 #else
7275                 s = skipspace(s);
7276 #endif
7277
7278                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7279                     (*s == ':' && s[1] == ':'))
7280                 {
7281 #ifdef PERL_MAD
7282                     SV *nametoke = NULL;
7283 #endif
7284
7285                     PL_expect = XBLOCK;
7286                     attrful = XATTRBLOCK;
7287                     /* remember buffer pos'n for later force_word */
7288                     tboffset = s - PL_oldbufptr;
7289                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7290 #ifdef PERL_MAD
7291                     if (PL_madskills)
7292                         nametoke = newSVpvn(s, d - s);
7293 #endif
7294                     if (memchr(tmpbuf, ':', len))
7295                         sv_setpvn(PL_subname, tmpbuf, len);
7296                     else {
7297                         sv_setsv(PL_subname,PL_curstname);
7298                         sv_catpvs(PL_subname,"::");
7299                         sv_catpvn(PL_subname,tmpbuf,len);
7300                     }
7301                     have_name = TRUE;
7302
7303 #ifdef PERL_MAD
7304
7305                     start_force(0);
7306                     CURMAD('X', nametoke);
7307                     CURMAD('_', tmpwhite);
7308                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7309                                       FALSE, TRUE, TRUE);
7310
7311                     s = SKIPSPACE2(d,tmpwhite);
7312 #else
7313                     s = skipspace(d);
7314 #endif
7315                 }
7316                 else {
7317                     if (key == KEY_my)
7318                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7319                     PL_expect = XTERMBLOCK;
7320                     attrful = XATTRTERM;
7321                     sv_setpvs(PL_subname,"?");
7322                     have_name = FALSE;
7323                 }
7324
7325                 if (key == KEY_format) {
7326                     if (*s == '=')
7327                         PL_lex_formbrack = PL_lex_brackets + 1;
7328 #ifdef PERL_MAD
7329                     PL_thistoken = subtoken;
7330                     s = d;
7331 #else
7332                     if (have_name)
7333                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7334                                           FALSE, TRUE, TRUE);
7335 #endif
7336                     OPERATOR(FORMAT);
7337                 }
7338
7339                 /* Look for a prototype */
7340                 if (*s == '(') {
7341                     char *p;
7342                     bool bad_proto = FALSE;
7343                     bool in_brackets = FALSE;
7344                     char greedy_proto = ' ';
7345                     bool proto_after_greedy_proto = FALSE;
7346                     bool must_be_last = FALSE;
7347                     bool underscore = FALSE;
7348                     bool seen_underscore = FALSE;
7349                     const bool warnsyntax = ckWARN(WARN_SYNTAX);
7350
7351                     s = scan_str(s,!!PL_madskills,FALSE);
7352                     if (!s)
7353                         Perl_croak(aTHX_ "Prototype not terminated");
7354                     /* strip spaces and check for bad characters */
7355                     d = SvPVX(PL_lex_stuff);
7356                     tmp = 0;
7357                     for (p = d; *p; ++p) {
7358                         if (!isSPACE(*p)) {
7359                             d[tmp++] = *p;
7360
7361                             if (warnsyntax) {
7362                                 if (must_be_last)
7363                                     proto_after_greedy_proto = TRUE;
7364                                 if (!strchr("$@%*;[]&\\_", *p)) {
7365                                     bad_proto = TRUE;
7366                                 }
7367                                 else {
7368                                     if ( underscore ) {
7369                                         if ( *p != ';' )
7370                                             bad_proto = TRUE;
7371                                         underscore = FALSE;
7372                                     }
7373                                     if ( *p == '[' ) {
7374                                         in_brackets = TRUE;
7375                                     }
7376                                     else if ( *p == ']' ) {
7377                                         in_brackets = FALSE;
7378                                     }
7379                                     else if ( (*p == '@' || *p == '%') &&
7380                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7381                                          !in_brackets ) {
7382                                         must_be_last = TRUE;
7383                                         greedy_proto = *p;
7384                                     }
7385                                     else if ( *p == '_' ) {
7386                                         underscore = seen_underscore = TRUE;
7387                                     }
7388                                 }
7389                             }
7390                         }
7391                     }
7392                     d[tmp] = '\0';
7393                     if (proto_after_greedy_proto)
7394                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7395                                     "Prototype after '%c' for %"SVf" : %s",
7396                                     greedy_proto, SVfARG(PL_subname), d);
7397                     if (bad_proto)
7398                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7399                                     "Illegal character %sin prototype for %"SVf" : %s",
7400                                     seen_underscore ? "after '_' " : "",
7401                                     SVfARG(PL_subname), d);
7402                     SvCUR_set(PL_lex_stuff, tmp);
7403                     have_proto = TRUE;
7404
7405 #ifdef PERL_MAD
7406                     start_force(0);
7407                     CURMAD('q', PL_thisopen);
7408                     CURMAD('_', tmpwhite);
7409                     CURMAD('=', PL_thisstuff);
7410                     CURMAD('Q', PL_thisclose);
7411                     NEXTVAL_NEXTTOKE.opval =
7412                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7413                     PL_lex_stuff = NULL;
7414                     force_next(THING);
7415
7416                     s = SKIPSPACE2(s,tmpwhite);
7417 #else
7418                     s = skipspace(s);
7419 #endif
7420                 }
7421                 else
7422                     have_proto = FALSE;
7423
7424                 if (*s == ':' && s[1] != ':')
7425                     PL_expect = attrful;
7426                 else if (*s != '{' && key == KEY_sub) {
7427                     if (!have_name)
7428                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7429                     else if (*s != ';')
7430                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7431                 }
7432
7433 #ifdef PERL_MAD
7434                 start_force(0);
7435                 if (tmpwhite) {
7436                     if (PL_madskills)
7437                         curmad('^', newSVpvs(""));
7438                     CURMAD('_', tmpwhite);
7439                 }
7440                 force_next(0);
7441
7442                 PL_thistoken = subtoken;
7443 #else
7444                 if (have_proto) {
7445                     NEXTVAL_NEXTTOKE.opval =
7446                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7447                     PL_lex_stuff = NULL;
7448                     force_next(THING);
7449                 }
7450 #endif
7451                 if (!have_name) {
7452                     if (PL_curstash)
7453                         sv_setpvs(PL_subname, "__ANON__");
7454                     else
7455                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7456                     TOKEN(ANONSUB);
7457                 }
7458 #ifndef PERL_MAD
7459                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7460                                   FALSE, TRUE, TRUE);
7461 #endif
7462                 if (key == KEY_my)
7463                     TOKEN(MYSUB);
7464                 TOKEN(SUB);
7465             }
7466
7467         case KEY_system:
7468             LOP(OP_SYSTEM,XREF);
7469
7470         case KEY_symlink:
7471             LOP(OP_SYMLINK,XTERM);
7472
7473         case KEY_syscall:
7474             LOP(OP_SYSCALL,XTERM);
7475
7476         case KEY_sysopen:
7477             LOP(OP_SYSOPEN,XTERM);
7478
7479         case KEY_sysseek:
7480             LOP(OP_SYSSEEK,XTERM);
7481
7482         case KEY_sysread:
7483             LOP(OP_SYSREAD,XTERM);
7484
7485         case KEY_syswrite:
7486             LOP(OP_SYSWRITE,XTERM);
7487
7488         case KEY_tr:
7489             s = scan_trans(s);
7490             TERM(sublex_start());
7491
7492         case KEY_tell:
7493             UNI(OP_TELL);
7494
7495         case KEY_telldir:
7496             UNI(OP_TELLDIR);
7497
7498         case KEY_tie:
7499             LOP(OP_TIE,XTERM);
7500
7501         case KEY_tied:
7502             UNI(OP_TIED);
7503
7504         case KEY_time:
7505             FUN0(OP_TIME);
7506
7507         case KEY_times:
7508             FUN0(OP_TMS);
7509
7510         case KEY_truncate:
7511             LOP(OP_TRUNCATE,XTERM);
7512
7513         case KEY_uc:
7514             UNI(OP_UC);
7515
7516         case KEY_ucfirst:
7517             UNI(OP_UCFIRST);
7518
7519         case KEY_untie:
7520             UNI(OP_UNTIE);
7521
7522         case KEY_until:
7523             pl_yylval.ival = CopLINE(PL_curcop);
7524             OPERATOR(UNTIL);
7525
7526         case KEY_unless:
7527             pl_yylval.ival = CopLINE(PL_curcop);
7528             OPERATOR(UNLESS);
7529
7530         case KEY_unlink:
7531             LOP(OP_UNLINK,XTERM);
7532
7533         case KEY_undef:
7534             UNIDOR(OP_UNDEF);
7535
7536         case KEY_unpack:
7537             LOP(OP_UNPACK,XTERM);
7538
7539         case KEY_utime:
7540             LOP(OP_UTIME,XTERM);
7541
7542         case KEY_umask:
7543             UNIDOR(OP_UMASK);
7544
7545         case KEY_unshift:
7546             LOP(OP_UNSHIFT,XTERM);
7547
7548         case KEY_use:
7549             s = tokenize_use(1, s);
7550             OPERATOR(USE);
7551
7552         case KEY_values:
7553             UNI(OP_VALUES);
7554
7555         case KEY_vec:
7556             LOP(OP_VEC,XTERM);
7557
7558         case KEY_when:
7559             pl_yylval.ival = CopLINE(PL_curcop);
7560             OPERATOR(WHEN);
7561
7562         case KEY_while:
7563             pl_yylval.ival = CopLINE(PL_curcop);
7564             OPERATOR(WHILE);
7565
7566         case KEY_warn:
7567             PL_hints |= HINT_BLOCK_SCOPE;
7568             LOP(OP_WARN,XTERM);
7569
7570         case KEY_wait:
7571             FUN0(OP_WAIT);
7572
7573         case KEY_waitpid:
7574             LOP(OP_WAITPID,XTERM);
7575
7576         case KEY_wantarray:
7577             FUN0(OP_WANTARRAY);
7578
7579         case KEY_write:
7580 #ifdef EBCDIC
7581         {
7582             char ctl_l[2];
7583             ctl_l[0] = toCTRL('L');
7584             ctl_l[1] = '\0';
7585             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7586         }
7587 #else
7588             /* Make sure $^L is defined */
7589             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7590 #endif
7591             UNI(OP_ENTERWRITE);
7592
7593         case KEY_x:
7594             if (PL_expect == XOPERATOR)
7595                 Mop(OP_REPEAT);
7596             check_uni();
7597             goto just_a_word;
7598
7599         case KEY_xor:
7600             pl_yylval.ival = OP_XOR;
7601             OPERATOR(OROP);
7602
7603         case KEY_y:
7604             s = scan_trans(s);
7605             TERM(sublex_start());
7606         }
7607     }}
7608 }
7609 #ifdef __SC__
7610 #pragma segment Main
7611 #endif
7612
7613 static int
7614 S_pending_ident(pTHX)
7615 {
7616     dVAR;
7617     register char *d;
7618     PADOFFSET tmp = 0;
7619     /* pit holds the identifier we read and pending_ident is reset */
7620     char pit = PL_pending_ident;
7621     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7622     /* All routes through this function want to know if there is a colon.  */
7623     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7624     PL_pending_ident = 0;
7625
7626     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7627     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7628           "### Pending identifier '%s'\n", PL_tokenbuf); });
7629
7630     /* if we're in a my(), we can't allow dynamics here.
7631        $foo'bar has already been turned into $foo::bar, so
7632        just check for colons.
7633
7634        if it's a legal name, the OP is a PADANY.
7635     */
7636     if (PL_in_my) {
7637         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7638             if (has_colon)
7639                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7640                                   "variable %s in \"our\"",
7641                                   PL_tokenbuf));
7642             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7643         }
7644         else {
7645             if (has_colon)
7646                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7647                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7648
7649             pl_yylval.opval = newOP(OP_PADANY, 0);
7650             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7651             return PRIVATEREF;
7652         }
7653     }
7654
7655     /*
7656        build the ops for accesses to a my() variable.
7657
7658        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7659        then used in a comparison.  This catches most, but not
7660        all cases.  For instance, it catches
7661            sort { my($a); $a <=> $b }
7662        but not
7663            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7664        (although why you'd do that is anyone's guess).
7665     */
7666
7667     if (!has_colon) {
7668         if (!PL_in_my)
7669             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7670         if (tmp != NOT_IN_PAD) {
7671             /* might be an "our" variable" */
7672             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7673                 /* build ops for a bareword */
7674                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7675                 HEK * const stashname = HvNAME_HEK(stash);
7676                 SV *  const sym = newSVhek(stashname);
7677                 sv_catpvs(sym, "::");
7678                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7679                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7680                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7681                 gv_fetchsv(sym,
7682                     (PL_in_eval
7683                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7684                         : GV_ADDMULTI
7685                     ),
7686                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7687                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7688                      : SVt_PVHV));
7689                 return WORD;
7690             }
7691
7692             /* if it's a sort block and they're naming $a or $b */
7693             if (PL_last_lop_op == OP_SORT &&
7694                 PL_tokenbuf[0] == '$' &&
7695                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7696                 && !PL_tokenbuf[2])
7697             {
7698                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7699                      d < PL_bufend && *d != '\n';
7700                      d++)
7701                 {
7702                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7703                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7704                               PL_tokenbuf);
7705                     }
7706                 }
7707             }
7708
7709             pl_yylval.opval = newOP(OP_PADANY, 0);
7710             pl_yylval.opval->op_targ = tmp;
7711             return PRIVATEREF;
7712         }
7713     }
7714
7715     /*
7716        Whine if they've said @foo in a doublequoted string,
7717        and @foo isn't a variable we can find in the symbol
7718        table.
7719     */
7720     if (ckWARN(WARN_AMBIGUOUS) &&
7721         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7722         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7723                                          SVt_PVAV);
7724         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7725                 /* DO NOT warn for @- and @+ */
7726                 && !( PL_tokenbuf[2] == '\0' &&
7727                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7728            )
7729         {
7730             /* Downgraded from fatal to warning 20000522 mjd */
7731             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7732                         "Possible unintended interpolation of %s in string",
7733                         PL_tokenbuf);
7734         }
7735     }
7736
7737     /* build ops for a bareword */
7738     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7739                                                       tokenbuf_len - 1));
7740     pl_yylval.opval->op_private = OPpCONST_ENTERED;
7741     gv_fetchpvn_flags(
7742             PL_tokenbuf + 1, tokenbuf_len - 1,
7743             /* If the identifier refers to a stash, don't autovivify it.
7744              * Change 24660 had the side effect of causing symbol table
7745              * hashes to always be defined, even if they were freshly
7746              * created and the only reference in the entire program was
7747              * the single statement with the defined %foo::bar:: test.
7748              * It appears that all code in the wild doing this actually
7749              * wants to know whether sub-packages have been loaded, so
7750              * by avoiding auto-vivifying symbol tables, we ensure that
7751              * defined %foo::bar:: continues to be false, and the existing
7752              * tests still give the expected answers, even though what
7753              * they're actually testing has now changed subtly.
7754              */
7755             (*PL_tokenbuf == '%'
7756              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7757              && d[-1] == ':'
7758              ? 0
7759              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7760             ((PL_tokenbuf[0] == '$') ? SVt_PV
7761              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7762              : SVt_PVHV));
7763     return WORD;
7764 }
7765
7766 /*
7767  *  The following code was generated by perl_keyword.pl.
7768  */
7769
7770 I32
7771 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7772 {
7773     dVAR;
7774
7775     PERL_ARGS_ASSERT_KEYWORD;
7776
7777   switch (len)
7778   {
7779     case 1: /* 5 tokens of length 1 */
7780       switch (name[0])
7781       {
7782         case 'm':
7783           {                                       /* m          */
7784             return KEY_m;
7785           }
7786
7787         case 'q':
7788           {                                       /* q          */
7789             return KEY_q;
7790           }
7791
7792         case 's':
7793           {                                       /* s          */
7794             return KEY_s;
7795           }
7796
7797         case 'x':
7798           {                                       /* x          */
7799             return -KEY_x;
7800           }
7801
7802         case 'y':
7803           {                                       /* y          */
7804             return KEY_y;
7805           }
7806
7807         default:
7808           goto unknown;
7809       }
7810
7811     case 2: /* 18 tokens of length 2 */
7812       switch (name[0])
7813       {
7814         case 'd':
7815           if (name[1] == 'o')
7816           {                                       /* do         */
7817             return KEY_do;
7818           }
7819
7820           goto unknown;
7821
7822         case 'e':
7823           if (name[1] == 'q')
7824           {                                       /* eq         */
7825             return -KEY_eq;
7826           }
7827
7828           goto unknown;
7829
7830         case 'g':
7831           switch (name[1])
7832           {
7833             case 'e':
7834               {                                   /* ge         */
7835                 return -KEY_ge;
7836               }
7837
7838             case 't':
7839               {                                   /* gt         */
7840                 return -KEY_gt;
7841               }
7842
7843             default:
7844               goto unknown;
7845           }
7846
7847         case 'i':
7848           if (name[1] == 'f')
7849           {                                       /* if         */
7850             return KEY_if;
7851           }
7852
7853           goto unknown;
7854
7855         case 'l':
7856           switch (name[1])
7857           {
7858             case 'c':
7859               {                                   /* lc         */
7860                 return -KEY_lc;
7861               }
7862
7863             case 'e':
7864               {                                   /* le         */
7865                 return -KEY_le;
7866               }
7867
7868             case 't':
7869               {                                   /* lt         */
7870                 return -KEY_lt;
7871               }
7872
7873             default:
7874               goto unknown;
7875           }
7876
7877         case 'm':
7878           if (name[1] == 'y')
7879           {                                       /* my         */
7880             return KEY_my;
7881           }
7882
7883           goto unknown;
7884
7885         case 'n':
7886           switch (name[1])
7887           {
7888             case 'e':
7889               {                                   /* ne         */
7890                 return -KEY_ne;
7891               }
7892
7893             case 'o':
7894               {                                   /* no         */
7895                 return KEY_no;
7896               }
7897
7898             default:
7899               goto unknown;
7900           }
7901
7902         case 'o':
7903           if (name[1] == 'r')
7904           {                                       /* or         */
7905             return -KEY_or;
7906           }
7907
7908           goto unknown;
7909
7910         case 'q':
7911           switch (name[1])
7912           {
7913             case 'q':
7914               {                                   /* qq         */
7915                 return KEY_qq;
7916               }
7917
7918             case 'r':
7919               {                                   /* qr         */
7920                 return KEY_qr;
7921               }
7922
7923             case 'w':
7924               {                                   /* qw         */
7925                 return KEY_qw;
7926               }
7927
7928             case 'x':
7929               {                                   /* qx         */
7930                 return KEY_qx;
7931               }
7932
7933             default:
7934               goto unknown;
7935           }
7936
7937         case 't':
7938           if (name[1] == 'r')
7939           {                                       /* tr         */
7940             return KEY_tr;
7941           }
7942
7943           goto unknown;
7944
7945         case 'u':
7946           if (name[1] == 'c')
7947           {                                       /* uc         */
7948             return -KEY_uc;
7949           }
7950
7951           goto unknown;
7952
7953         default:
7954           goto unknown;
7955       }
7956
7957     case 3: /* 29 tokens of length 3 */
7958       switch (name[0])
7959       {
7960         case 'E':
7961           if (name[1] == 'N' &&
7962               name[2] == 'D')
7963           {                                       /* END        */
7964             return KEY_END;
7965           }
7966
7967           goto unknown;
7968
7969         case 'a':
7970           switch (name[1])
7971           {
7972             case 'b':
7973               if (name[2] == 's')
7974               {                                   /* abs        */
7975                 return -KEY_abs;
7976               }
7977
7978               goto unknown;
7979
7980             case 'n':
7981               if (name[2] == 'd')
7982               {                                   /* and        */
7983                 return -KEY_and;
7984               }
7985
7986               goto unknown;
7987
7988             default:
7989               goto unknown;
7990           }
7991
7992         case 'c':
7993           switch (name[1])
7994           {
7995             case 'h':
7996               if (name[2] == 'r')
7997               {                                   /* chr        */
7998                 return -KEY_chr;
7999               }
8000
8001               goto unknown;
8002
8003             case 'm':
8004               if (name[2] == 'p')
8005               {                                   /* cmp        */
8006                 return -KEY_cmp;
8007               }
8008
8009               goto unknown;
8010
8011             case 'o':
8012               if (name[2] == 's')
8013               {                                   /* cos        */
8014                 return -KEY_cos;
8015               }
8016
8017               goto unknown;
8018
8019             default:
8020               goto unknown;
8021           }
8022
8023         case 'd':
8024           if (name[1] == 'i' &&
8025               name[2] == 'e')
8026           {                                       /* die        */
8027             return -KEY_die;
8028           }
8029
8030           goto unknown;
8031
8032         case 'e':
8033           switch (name[1])
8034           {
8035             case 'o':
8036               if (name[2] == 'f')
8037               {                                   /* eof        */
8038                 return -KEY_eof;
8039               }
8040
8041               goto unknown;
8042
8043             case 'x':
8044               if (name[2] == 'p')
8045               {                                   /* exp        */
8046                 return -KEY_exp;
8047               }
8048
8049               goto unknown;
8050
8051             default:
8052               goto unknown;
8053           }
8054
8055         case 'f':
8056           if (name[1] == 'o' &&
8057               name[2] == 'r')
8058           {                                       /* for        */
8059             return KEY_for;
8060           }
8061
8062           goto unknown;
8063
8064         case 'h':
8065           if (name[1] == 'e' &&
8066               name[2] == 'x')
8067           {                                       /* hex        */
8068             return -KEY_hex;
8069           }
8070
8071           goto unknown;
8072
8073         case 'i':
8074           if (name[1] == 'n' &&
8075               name[2] == 't')
8076           {                                       /* int        */
8077             return -KEY_int;
8078           }
8079
8080           goto unknown;
8081
8082         case 'l':
8083           if (name[1] == 'o' &&
8084               name[2] == 'g')
8085           {                                       /* log        */
8086             return -KEY_log;
8087           }
8088
8089           goto unknown;
8090
8091         case 'm':
8092           if (name[1] == 'a' &&
8093               name[2] == 'p')
8094           {                                       /* map        */
8095             return KEY_map;
8096           }
8097
8098           goto unknown;
8099
8100         case 'n':
8101           if (name[1] == 'o' &&
8102               name[2] == 't')
8103           {                                       /* not        */
8104             return -KEY_not;
8105           }
8106
8107           goto unknown;
8108
8109         case 'o':
8110           switch (name[1])
8111           {
8112             case 'c':
8113               if (name[2] == 't')
8114               {                                   /* oct        */
8115                 return -KEY_oct;
8116               }
8117
8118               goto unknown;
8119
8120             case 'r':
8121               if (name[2] == 'd')
8122               {                                   /* ord        */
8123                 return -KEY_ord;
8124               }
8125
8126               goto unknown;
8127
8128             case 'u':
8129               if (name[2] == 'r')
8130               {                                   /* our        */
8131                 return KEY_our;
8132               }
8133
8134               goto unknown;
8135
8136             default:
8137               goto unknown;
8138           }
8139
8140         case 'p':
8141           if (name[1] == 'o')
8142           {
8143             switch (name[2])
8144             {
8145               case 'p':
8146                 {                                 /* pop        */
8147                   return -KEY_pop;
8148                 }
8149
8150               case 's':
8151                 {                                 /* pos        */
8152                   return KEY_pos;
8153                 }
8154
8155               default:
8156                 goto unknown;
8157             }
8158           }
8159
8160           goto unknown;
8161
8162         case 'r':
8163           if (name[1] == 'e' &&
8164               name[2] == 'f')
8165           {                                       /* ref        */
8166             return -KEY_ref;
8167           }
8168
8169           goto unknown;
8170
8171         case 's':
8172           switch (name[1])
8173           {
8174             case 'a':
8175               if (name[2] == 'y')
8176               {                                   /* say        */
8177                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8178               }
8179
8180               goto unknown;
8181
8182             case 'i':
8183               if (name[2] == 'n')
8184               {                                   /* sin        */
8185                 return -KEY_sin;
8186               }
8187
8188               goto unknown;
8189
8190             case 'u':
8191               if (name[2] == 'b')
8192               {                                   /* sub        */
8193                 return KEY_sub;
8194               }
8195
8196               goto unknown;
8197
8198             default:
8199               goto unknown;
8200           }
8201
8202         case 't':
8203           if (name[1] == 'i' &&
8204               name[2] == 'e')
8205           {                                       /* tie        */
8206             return KEY_tie;
8207           }
8208
8209           goto unknown;
8210
8211         case 'u':
8212           if (name[1] == 's' &&
8213               name[2] == 'e')
8214           {                                       /* use        */
8215             return KEY_use;
8216           }
8217
8218           goto unknown;
8219
8220         case 'v':
8221           if (name[1] == 'e' &&
8222               name[2] == 'c')
8223           {                                       /* vec        */
8224             return -KEY_vec;
8225           }
8226
8227           goto unknown;
8228
8229         case 'x':
8230           if (name[1] == 'o' &&
8231               name[2] == 'r')
8232           {                                       /* xor        */
8233             return -KEY_xor;
8234           }
8235
8236           goto unknown;
8237
8238         default:
8239           goto unknown;
8240       }
8241
8242     case 4: /* 41 tokens of length 4 */
8243       switch (name[0])
8244       {
8245         case 'C':
8246           if (name[1] == 'O' &&
8247               name[2] == 'R' &&
8248               name[3] == 'E')
8249           {                                       /* CORE       */
8250             return -KEY_CORE;
8251           }
8252
8253           goto unknown;
8254
8255         case 'I':
8256           if (name[1] == 'N' &&
8257               name[2] == 'I' &&
8258               name[3] == 'T')
8259           {                                       /* INIT       */
8260             return KEY_INIT;
8261           }
8262
8263           goto unknown;
8264
8265         case 'b':
8266           if (name[1] == 'i' &&
8267               name[2] == 'n' &&
8268               name[3] == 'd')
8269           {                                       /* bind       */
8270             return -KEY_bind;
8271           }
8272
8273           goto unknown;
8274
8275         case 'c':
8276           if (name[1] == 'h' &&
8277               name[2] == 'o' &&
8278               name[3] == 'p')
8279           {                                       /* chop       */
8280             return -KEY_chop;
8281           }
8282
8283           goto unknown;
8284
8285         case 'd':
8286           if (name[1] == 'u' &&
8287               name[2] == 'm' &&
8288               name[3] == 'p')
8289           {                                       /* dump       */
8290             return -KEY_dump;
8291           }
8292
8293           goto unknown;
8294
8295         case 'e':
8296           switch (name[1])
8297           {
8298             case 'a':
8299               if (name[2] == 'c' &&
8300                   name[3] == 'h')
8301               {                                   /* each       */
8302                 return -KEY_each;
8303               }
8304
8305               goto unknown;
8306
8307             case 'l':
8308               if (name[2] == 's' &&
8309                   name[3] == 'e')
8310               {                                   /* else       */
8311                 return KEY_else;
8312               }
8313
8314               goto unknown;
8315
8316             case 'v':
8317               if (name[2] == 'a' &&
8318                   name[3] == 'l')
8319               {                                   /* eval       */
8320                 return KEY_eval;
8321               }
8322
8323               goto unknown;
8324
8325             case 'x':
8326               switch (name[2])
8327               {
8328                 case 'e':
8329                   if (name[3] == 'c')
8330                   {                               /* exec       */
8331                     return -KEY_exec;
8332                   }
8333
8334                   goto unknown;
8335
8336                 case 'i':
8337                   if (name[3] == 't')
8338                   {                               /* exit       */
8339                     return -KEY_exit;
8340                   }
8341
8342                   goto unknown;
8343
8344                 default:
8345                   goto unknown;
8346               }
8347
8348             default:
8349               goto unknown;
8350           }
8351
8352         case 'f':
8353           if (name[1] == 'o' &&
8354               name[2] == 'r' &&
8355               name[3] == 'k')
8356           {                                       /* fork       */
8357             return -KEY_fork;
8358           }
8359
8360           goto unknown;
8361
8362         case 'g':
8363           switch (name[1])
8364           {
8365             case 'e':
8366               if (name[2] == 't' &&
8367                   name[3] == 'c')
8368               {                                   /* getc       */
8369                 return -KEY_getc;
8370               }
8371
8372               goto unknown;
8373
8374             case 'l':
8375               if (name[2] == 'o' &&
8376                   name[3] == 'b')
8377               {                                   /* glob       */
8378                 return KEY_glob;
8379               }
8380
8381               goto unknown;
8382
8383             case 'o':
8384               if (name[2] == 't' &&
8385                   name[3] == 'o')
8386               {                                   /* goto       */
8387                 return KEY_goto;
8388               }
8389
8390               goto unknown;
8391
8392             case 'r':
8393               if (name[2] == 'e' &&
8394                   name[3] == 'p')
8395               {                                   /* grep       */
8396                 return KEY_grep;
8397               }
8398
8399               goto unknown;
8400
8401             default:
8402               goto unknown;
8403           }
8404
8405         case 'j':
8406           if (name[1] == 'o' &&
8407               name[2] == 'i' &&
8408               name[3] == 'n')
8409           {                                       /* join       */
8410             return -KEY_join;
8411           }
8412
8413           goto unknown;
8414
8415         case 'k':
8416           switch (name[1])
8417           {
8418             case 'e':
8419               if (name[2] == 'y' &&
8420                   name[3] == 's')
8421               {                                   /* keys       */
8422                 return -KEY_keys;
8423               }
8424
8425               goto unknown;
8426
8427             case 'i':
8428               if (name[2] == 'l' &&
8429                   name[3] == 'l')
8430               {                                   /* kill       */
8431                 return -KEY_kill;
8432               }
8433
8434               goto unknown;
8435
8436             default:
8437               goto unknown;
8438           }
8439
8440         case 'l':
8441           switch (name[1])
8442           {
8443             case 'a':
8444               if (name[2] == 's' &&
8445                   name[3] == 't')
8446               {                                   /* last       */
8447                 return KEY_last;
8448               }
8449
8450               goto unknown;
8451
8452             case 'i':
8453               if (name[2] == 'n' &&
8454                   name[3] == 'k')
8455               {                                   /* link       */
8456                 return -KEY_link;
8457               }
8458
8459               goto unknown;
8460
8461             case 'o':
8462               if (name[2] == 'c' &&
8463                   name[3] == 'k')
8464               {                                   /* lock       */
8465                 return -KEY_lock;
8466               }
8467
8468               goto unknown;
8469
8470             default:
8471               goto unknown;
8472           }
8473
8474         case 'n':
8475           if (name[1] == 'e' &&
8476               name[2] == 'x' &&
8477               name[3] == 't')
8478           {                                       /* next       */
8479             return KEY_next;
8480           }
8481
8482           goto unknown;
8483
8484         case 'o':
8485           if (name[1] == 'p' &&
8486               name[2] == 'e' &&
8487               name[3] == 'n')
8488           {                                       /* open       */
8489             return -KEY_open;
8490           }
8491
8492           goto unknown;
8493
8494         case 'p':
8495           switch (name[1])
8496           {
8497             case 'a':
8498               if (name[2] == 'c' &&
8499                   name[3] == 'k')
8500               {                                   /* pack       */
8501                 return -KEY_pack;
8502               }
8503
8504               goto unknown;
8505
8506             case 'i':
8507               if (name[2] == 'p' &&
8508                   name[3] == 'e')
8509               {                                   /* pipe       */
8510                 return -KEY_pipe;
8511               }
8512
8513               goto unknown;
8514
8515             case 'u':
8516               if (name[2] == 's' &&
8517                   name[3] == 'h')
8518               {                                   /* push       */
8519                 return -KEY_push;
8520               }
8521
8522               goto unknown;
8523
8524             default:
8525               goto unknown;
8526           }
8527
8528         case 'r':
8529           switch (name[1])
8530           {
8531             case 'a':
8532               if (name[2] == 'n' &&
8533                   name[3] == 'd')
8534               {                                   /* rand       */
8535                 return -KEY_rand;
8536               }
8537
8538               goto unknown;
8539
8540             case 'e':
8541               switch (name[2])
8542               {
8543                 case 'a':
8544                   if (name[3] == 'd')
8545                   {                               /* read       */
8546                     return -KEY_read;
8547                   }
8548
8549                   goto unknown;
8550
8551                 case 'c':
8552                   if (name[3] == 'v')
8553                   {                               /* recv       */
8554                     return -KEY_recv;
8555                   }
8556
8557                   goto unknown;
8558
8559                 case 'd':
8560                   if (name[3] == 'o')
8561                   {                               /* redo       */
8562                     return KEY_redo;
8563                   }
8564
8565                   goto unknown;
8566
8567                 default:
8568                   goto unknown;
8569               }
8570
8571             default:
8572               goto unknown;
8573           }
8574
8575         case 's':
8576           switch (name[1])
8577           {
8578             case 'e':
8579               switch (name[2])
8580               {
8581                 case 'e':
8582                   if (name[3] == 'k')
8583                   {                               /* seek       */
8584                     return -KEY_seek;
8585                   }
8586
8587                   goto unknown;
8588
8589                 case 'n':
8590                   if (name[3] == 'd')
8591                   {                               /* send       */
8592                     return -KEY_send;
8593                   }
8594
8595                   goto unknown;
8596
8597                 default:
8598                   goto unknown;
8599               }
8600
8601             case 'o':
8602               if (name[2] == 'r' &&
8603                   name[3] == 't')
8604               {                                   /* sort       */
8605                 return KEY_sort;
8606               }
8607
8608               goto unknown;
8609
8610             case 'q':
8611               if (name[2] == 'r' &&
8612                   name[3] == 't')
8613               {                                   /* sqrt       */
8614                 return -KEY_sqrt;
8615               }
8616
8617               goto unknown;
8618
8619             case 't':
8620               if (name[2] == 'a' &&
8621                   name[3] == 't')
8622               {                                   /* stat       */
8623                 return -KEY_stat;
8624               }
8625
8626               goto unknown;
8627
8628             default:
8629               goto unknown;
8630           }
8631
8632         case 't':
8633           switch (name[1])
8634           {
8635             case 'e':
8636               if (name[2] == 'l' &&
8637                   name[3] == 'l')
8638               {                                   /* tell       */
8639                 return -KEY_tell;
8640               }
8641
8642               goto unknown;
8643
8644             case 'i':
8645               switch (name[2])
8646               {
8647                 case 'e':
8648                   if (name[3] == 'd')
8649                   {                               /* tied       */
8650                     return KEY_tied;
8651                   }
8652
8653                   goto unknown;
8654
8655                 case 'm':
8656                   if (name[3] == 'e')
8657                   {                               /* time       */
8658                     return -KEY_time;
8659                   }
8660
8661                   goto unknown;
8662
8663                 default:
8664                   goto unknown;
8665               }
8666
8667             default:
8668               goto unknown;
8669           }
8670
8671         case 'w':
8672           switch (name[1])
8673           {
8674             case 'a':
8675               switch (name[2])
8676               {
8677                 case 'i':
8678                   if (name[3] == 't')
8679                   {                               /* wait       */
8680                     return -KEY_wait;
8681                   }
8682
8683                   goto unknown;
8684
8685                 case 'r':
8686                   if (name[3] == 'n')
8687                   {                               /* warn       */
8688                     return -KEY_warn;
8689                   }
8690
8691                   goto unknown;
8692
8693                 default:
8694                   goto unknown;
8695               }
8696
8697             case 'h':
8698               if (name[2] == 'e' &&
8699                   name[3] == 'n')
8700               {                                   /* when       */
8701                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8702               }
8703
8704               goto unknown;
8705
8706             default:
8707               goto unknown;
8708           }
8709
8710         default:
8711           goto unknown;
8712       }
8713
8714     case 5: /* 39 tokens of length 5 */
8715       switch (name[0])
8716       {
8717         case 'B':
8718           if (name[1] == 'E' &&
8719               name[2] == 'G' &&
8720               name[3] == 'I' &&
8721               name[4] == 'N')
8722           {                                       /* BEGIN      */
8723             return KEY_BEGIN;
8724           }
8725
8726           goto unknown;
8727
8728         case 'C':
8729           if (name[1] == 'H' &&
8730               name[2] == 'E' &&
8731               name[3] == 'C' &&
8732               name[4] == 'K')
8733           {                                       /* CHECK      */
8734             return KEY_CHECK;
8735           }
8736
8737           goto unknown;
8738
8739         case 'a':
8740           switch (name[1])
8741           {
8742             case 'l':
8743               if (name[2] == 'a' &&
8744                   name[3] == 'r' &&
8745                   name[4] == 'm')
8746               {                                   /* alarm      */
8747                 return -KEY_alarm;
8748               }
8749
8750               goto unknown;
8751
8752             case 't':
8753               if (name[2] == 'a' &&
8754                   name[3] == 'n' &&
8755                   name[4] == '2')
8756               {                                   /* atan2      */
8757                 return -KEY_atan2;
8758               }
8759
8760               goto unknown;
8761
8762             default:
8763               goto unknown;
8764           }
8765
8766         case 'b':
8767           switch (name[1])
8768           {
8769             case 'l':
8770               if (name[2] == 'e' &&
8771                   name[3] == 's' &&
8772                   name[4] == 's')
8773               {                                   /* bless      */
8774                 return -KEY_bless;
8775               }
8776
8777               goto unknown;
8778
8779             case 'r':
8780               if (name[2] == 'e' &&
8781                   name[3] == 'a' &&
8782                   name[4] == 'k')
8783               {                                   /* break      */
8784                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8785               }
8786
8787               goto unknown;
8788
8789             default:
8790               goto unknown;
8791           }
8792
8793         case 'c':
8794           switch (name[1])
8795           {
8796             case 'h':
8797               switch (name[2])
8798               {
8799                 case 'd':
8800                   if (name[3] == 'i' &&
8801                       name[4] == 'r')
8802                   {                               /* chdir      */
8803                     return -KEY_chdir;
8804                   }
8805
8806                   goto unknown;
8807
8808                 case 'm':
8809                   if (name[3] == 'o' &&
8810                       name[4] == 'd')
8811                   {                               /* chmod      */
8812                     return -KEY_chmod;
8813                   }
8814
8815                   goto unknown;
8816
8817                 case 'o':
8818                   switch (name[3])
8819                   {
8820                     case 'm':
8821                       if (name[4] == 'p')
8822                       {                           /* chomp      */
8823                         return -KEY_chomp;
8824                       }
8825
8826                       goto unknown;
8827
8828                     case 'w':
8829                       if (name[4] == 'n')
8830                       {                           /* chown      */
8831                         return -KEY_chown;
8832                       }
8833
8834                       goto unknown;
8835
8836                     default:
8837                       goto unknown;
8838                   }
8839
8840                 default:
8841                   goto unknown;
8842               }
8843
8844             case 'l':
8845               if (name[2] == 'o' &&
8846                   name[3] == 's' &&
8847                   name[4] == 'e')
8848               {                                   /* close      */
8849                 return -KEY_close;
8850               }
8851
8852               goto unknown;
8853
8854             case 'r':
8855               if (name[2] == 'y' &&
8856                   name[3] == 'p' &&
8857                   name[4] == 't')
8858               {                                   /* crypt      */
8859                 return -KEY_crypt;
8860               }
8861
8862               goto unknown;
8863
8864             default:
8865               goto unknown;
8866           }
8867
8868         case 'e':
8869           if (name[1] == 'l' &&
8870               name[2] == 's' &&
8871               name[3] == 'i' &&
8872               name[4] == 'f')
8873           {                                       /* elsif      */
8874             return KEY_elsif;
8875           }
8876
8877           goto unknown;
8878
8879         case 'f':
8880           switch (name[1])
8881           {
8882             case 'c':
8883               if (name[2] == 'n' &&
8884                   name[3] == 't' &&
8885                   name[4] == 'l')
8886               {                                   /* fcntl      */
8887                 return -KEY_fcntl;
8888               }
8889
8890               goto unknown;
8891
8892             case 'l':
8893               if (name[2] == 'o' &&
8894                   name[3] == 'c' &&
8895                   name[4] == 'k')
8896               {                                   /* flock      */
8897                 return -KEY_flock;
8898               }
8899
8900               goto unknown;
8901
8902             default:
8903               goto unknown;
8904           }
8905
8906         case 'g':
8907           if (name[1] == 'i' &&
8908               name[2] == 'v' &&
8909               name[3] == 'e' &&
8910               name[4] == 'n')
8911           {                                       /* given      */
8912             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8913           }
8914
8915           goto unknown;
8916
8917         case 'i':
8918           switch (name[1])
8919           {
8920             case 'n':
8921               if (name[2] == 'd' &&
8922                   name[3] == 'e' &&
8923                   name[4] == 'x')
8924               {                                   /* index      */
8925                 return -KEY_index;
8926               }
8927
8928               goto unknown;
8929
8930             case 'o':
8931               if (name[2] == 'c' &&
8932                   name[3] == 't' &&
8933                   name[4] == 'l')
8934               {                                   /* ioctl      */
8935                 return -KEY_ioctl;
8936               }
8937
8938               goto unknown;
8939
8940             default:
8941               goto unknown;
8942           }
8943
8944         case 'l':
8945           switch (name[1])
8946           {
8947             case 'o':
8948               if (name[2] == 'c' &&
8949                   name[3] == 'a' &&
8950                   name[4] == 'l')
8951               {                                   /* local      */
8952                 return KEY_local;
8953               }
8954
8955               goto unknown;
8956
8957             case 's':
8958               if (name[2] == 't' &&
8959                   name[3] == 'a' &&
8960                   name[4] == 't')
8961               {                                   /* lstat      */
8962                 return -KEY_lstat;
8963               }
8964
8965               goto unknown;
8966
8967             default:
8968               goto unknown;
8969           }
8970
8971         case 'm':
8972           if (name[1] == 'k' &&
8973               name[2] == 'd' &&
8974               name[3] == 'i' &&
8975               name[4] == 'r')
8976           {                                       /* mkdir      */
8977             return -KEY_mkdir;
8978           }
8979
8980           goto unknown;
8981
8982         case 'p':
8983           if (name[1] == 'r' &&
8984               name[2] == 'i' &&
8985               name[3] == 'n' &&
8986               name[4] == 't')
8987           {                                       /* print      */
8988             return KEY_print;
8989           }
8990
8991           goto unknown;
8992
8993         case 'r':
8994           switch (name[1])
8995           {
8996             case 'e':
8997               if (name[2] == 's' &&
8998                   name[3] == 'e' &&
8999                   name[4] == 't')
9000               {                                   /* reset      */
9001                 return -KEY_reset;
9002               }
9003
9004               goto unknown;
9005
9006             case 'm':
9007               if (name[2] == 'd' &&
9008                   name[3] == 'i' &&
9009                   name[4] == 'r')
9010               {                                   /* rmdir      */
9011                 return -KEY_rmdir;
9012               }
9013
9014               goto unknown;
9015
9016             default:
9017               goto unknown;
9018           }
9019
9020         case 's':
9021           switch (name[1])
9022           {
9023             case 'e':
9024               if (name[2] == 'm' &&
9025                   name[3] == 'o' &&
9026                   name[4] == 'p')
9027               {                                   /* semop      */
9028                 return -KEY_semop;
9029               }
9030
9031               goto unknown;
9032
9033             case 'h':
9034               if (name[2] == 'i' &&
9035                   name[3] == 'f' &&
9036                   name[4] == 't')
9037               {                                   /* shift      */
9038                 return -KEY_shift;
9039               }
9040
9041               goto unknown;
9042
9043             case 'l':
9044               if (name[2] == 'e' &&
9045                   name[3] == 'e' &&
9046                   name[4] == 'p')
9047               {                                   /* sleep      */
9048                 return -KEY_sleep;
9049               }
9050
9051               goto unknown;
9052
9053             case 'p':
9054               if (name[2] == 'l' &&
9055                   name[3] == 'i' &&
9056                   name[4] == 't')
9057               {                                   /* split      */
9058                 return KEY_split;
9059               }
9060
9061               goto unknown;
9062
9063             case 'r':
9064               if (name[2] == 'a' &&
9065                   name[3] == 'n' &&
9066                   name[4] == 'd')
9067               {                                   /* srand      */
9068                 return -KEY_srand;
9069               }
9070
9071               goto unknown;
9072
9073             case 't':
9074               switch (name[2])
9075               {
9076                 case 'a':
9077                   if (name[3] == 't' &&
9078                       name[4] == 'e')
9079                   {                               /* state      */
9080                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9081                   }
9082
9083                   goto unknown;
9084
9085                 case 'u':
9086                   if (name[3] == 'd' &&
9087                       name[4] == 'y')
9088                   {                               /* study      */
9089                     return KEY_study;
9090                   }
9091
9092                   goto unknown;
9093
9094                 default:
9095                   goto unknown;
9096               }
9097
9098             default:
9099               goto unknown;
9100           }
9101
9102         case 't':
9103           if (name[1] == 'i' &&
9104               name[2] == 'm' &&
9105               name[3] == 'e' &&
9106               name[4] == 's')
9107           {                                       /* times      */
9108             return -KEY_times;
9109           }
9110
9111           goto unknown;
9112
9113         case 'u':
9114           switch (name[1])
9115           {
9116             case 'm':
9117               if (name[2] == 'a' &&
9118                   name[3] == 's' &&
9119                   name[4] == 'k')
9120               {                                   /* umask      */
9121                 return -KEY_umask;
9122               }
9123
9124               goto unknown;
9125
9126             case 'n':
9127               switch (name[2])
9128               {
9129                 case 'd':
9130                   if (name[3] == 'e' &&
9131                       name[4] == 'f')
9132                   {                               /* undef      */
9133                     return KEY_undef;
9134                   }
9135
9136                   goto unknown;
9137
9138                 case 't':
9139                   if (name[3] == 'i')
9140                   {
9141                     switch (name[4])
9142                     {
9143                       case 'e':
9144                         {                         /* untie      */
9145                           return KEY_untie;
9146                         }
9147
9148                       case 'l':
9149                         {                         /* until      */
9150                           return KEY_until;
9151                         }
9152
9153                       default:
9154                         goto unknown;
9155                     }
9156                   }
9157
9158                   goto unknown;
9159
9160                 default:
9161                   goto unknown;
9162               }
9163
9164             case 't':
9165               if (name[2] == 'i' &&
9166                   name[3] == 'm' &&
9167                   name[4] == 'e')
9168               {                                   /* utime      */
9169                 return -KEY_utime;
9170               }
9171
9172               goto unknown;
9173
9174             default:
9175               goto unknown;
9176           }
9177
9178         case 'w':
9179           switch (name[1])
9180           {
9181             case 'h':
9182               if (name[2] == 'i' &&
9183                   name[3] == 'l' &&
9184                   name[4] == 'e')
9185               {                                   /* while      */
9186                 return KEY_while;
9187               }
9188
9189               goto unknown;
9190
9191             case 'r':
9192               if (name[2] == 'i' &&
9193                   name[3] == 't' &&
9194                   name[4] == 'e')
9195               {                                   /* write      */
9196                 return -KEY_write;
9197               }
9198
9199               goto unknown;
9200
9201             default:
9202               goto unknown;
9203           }
9204
9205         default:
9206           goto unknown;
9207       }
9208
9209     case 6: /* 33 tokens of length 6 */
9210       switch (name[0])
9211       {
9212         case 'a':
9213           if (name[1] == 'c' &&
9214               name[2] == 'c' &&
9215               name[3] == 'e' &&
9216               name[4] == 'p' &&
9217               name[5] == 't')
9218           {                                       /* accept     */
9219             return -KEY_accept;
9220           }
9221
9222           goto unknown;
9223
9224         case 'c':
9225           switch (name[1])
9226           {
9227             case 'a':
9228               if (name[2] == 'l' &&
9229                   name[3] == 'l' &&
9230                   name[4] == 'e' &&
9231                   name[5] == 'r')
9232               {                                   /* caller     */
9233                 return -KEY_caller;
9234               }
9235
9236               goto unknown;
9237
9238             case 'h':
9239               if (name[2] == 'r' &&
9240                   name[3] == 'o' &&
9241                   name[4] == 'o' &&
9242                   name[5] == 't')
9243               {                                   /* chroot     */
9244                 return -KEY_chroot;
9245               }
9246
9247               goto unknown;
9248
9249             default:
9250               goto unknown;
9251           }
9252
9253         case 'd':
9254           if (name[1] == 'e' &&
9255               name[2] == 'l' &&
9256               name[3] == 'e' &&
9257               name[4] == 't' &&
9258               name[5] == 'e')
9259           {                                       /* delete     */
9260             return KEY_delete;
9261           }
9262
9263           goto unknown;
9264
9265         case 'e':
9266           switch (name[1])
9267           {
9268             case 'l':
9269               if (name[2] == 's' &&
9270                   name[3] == 'e' &&
9271                   name[4] == 'i' &&
9272                   name[5] == 'f')
9273               {                                   /* elseif     */
9274                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9275               }
9276
9277               goto unknown;
9278
9279             case 'x':
9280               if (name[2] == 'i' &&
9281                   name[3] == 's' &&
9282                   name[4] == 't' &&
9283                   name[5] == 's')
9284               {                                   /* exists     */
9285                 return KEY_exists;
9286               }
9287
9288               goto unknown;
9289
9290             default:
9291               goto unknown;
9292           }
9293
9294         case 'f':
9295           switch (name[1])
9296           {
9297             case 'i':
9298               if (name[2] == 'l' &&
9299                   name[3] == 'e' &&
9300                   name[4] == 'n' &&
9301                   name[5] == 'o')
9302               {                                   /* fileno     */
9303                 return -KEY_fileno;
9304               }
9305
9306               goto unknown;
9307
9308             case 'o':
9309               if (name[2] == 'r' &&
9310                   name[3] == 'm' &&
9311                   name[4] == 'a' &&
9312                   name[5] == 't')
9313               {                                   /* format     */
9314                 return KEY_format;
9315               }
9316
9317               goto unknown;
9318
9319             default:
9320               goto unknown;
9321           }
9322
9323         case 'g':
9324           if (name[1] == 'm' &&
9325               name[2] == 't' &&
9326               name[3] == 'i' &&
9327               name[4] == 'm' &&
9328               name[5] == 'e')
9329           {                                       /* gmtime     */
9330             return -KEY_gmtime;
9331           }
9332
9333           goto unknown;
9334
9335         case 'l':
9336           switch (name[1])
9337           {
9338             case 'e':
9339               if (name[2] == 'n' &&
9340                   name[3] == 'g' &&
9341                   name[4] == 't' &&
9342                   name[5] == 'h')
9343               {                                   /* length     */
9344                 return -KEY_length;
9345               }
9346
9347               goto unknown;
9348
9349             case 'i':
9350               if (name[2] == 's' &&
9351                   name[3] == 't' &&
9352                   name[4] == 'e' &&
9353                   name[5] == 'n')
9354               {                                   /* listen     */
9355                 return -KEY_listen;
9356               }
9357
9358               goto unknown;
9359
9360             default:
9361               goto unknown;
9362           }
9363
9364         case 'm':
9365           if (name[1] == 's' &&
9366               name[2] == 'g')
9367           {
9368             switch (name[3])
9369             {
9370               case 'c':
9371                 if (name[4] == 't' &&
9372                     name[5] == 'l')
9373                 {                                 /* msgctl     */
9374                   return -KEY_msgctl;
9375                 }
9376
9377                 goto unknown;
9378
9379               case 'g':
9380                 if (name[4] == 'e' &&
9381                     name[5] == 't')
9382                 {                                 /* msgget     */
9383                   return -KEY_msgget;
9384                 }
9385
9386                 goto unknown;
9387
9388               case 'r':
9389                 if (name[4] == 'c' &&
9390                     name[5] == 'v')
9391                 {                                 /* msgrcv     */
9392                   return -KEY_msgrcv;
9393                 }
9394
9395                 goto unknown;
9396
9397               case 's':
9398                 if (name[4] == 'n' &&
9399                     name[5] == 'd')
9400                 {                                 /* msgsnd     */
9401                   return -KEY_msgsnd;
9402                 }
9403
9404                 goto unknown;
9405
9406               default:
9407                 goto unknown;
9408             }
9409           }
9410
9411           goto unknown;
9412
9413         case 'p':
9414           if (name[1] == 'r' &&
9415               name[2] == 'i' &&
9416               name[3] == 'n' &&
9417               name[4] == 't' &&
9418               name[5] == 'f')
9419           {                                       /* printf     */
9420             return KEY_printf;
9421           }
9422
9423           goto unknown;
9424
9425         case 'r':
9426           switch (name[1])
9427           {
9428             case 'e':
9429               switch (name[2])
9430               {
9431                 case 'n':
9432                   if (name[3] == 'a' &&
9433                       name[4] == 'm' &&
9434                       name[5] == 'e')
9435                   {                               /* rename     */
9436                     return -KEY_rename;
9437                   }
9438
9439                   goto unknown;
9440
9441                 case 't':
9442                   if (name[3] == 'u' &&
9443                       name[4] == 'r' &&
9444                       name[5] == 'n')
9445                   {                               /* return     */
9446                     return KEY_return;
9447                   }
9448
9449                   goto unknown;
9450
9451                 default:
9452                   goto unknown;
9453               }
9454
9455             case 'i':
9456               if (name[2] == 'n' &&
9457                   name[3] == 'd' &&
9458                   name[4] == 'e' &&
9459                   name[5] == 'x')
9460               {                                   /* rindex     */
9461                 return -KEY_rindex;
9462               }
9463
9464               goto unknown;
9465
9466             default:
9467               goto unknown;
9468           }
9469
9470         case 's':
9471           switch (name[1])
9472           {
9473             case 'c':
9474               if (name[2] == 'a' &&
9475                   name[3] == 'l' &&
9476                   name[4] == 'a' &&
9477                   name[5] == 'r')
9478               {                                   /* scalar     */
9479                 return KEY_scalar;
9480               }
9481
9482               goto unknown;
9483
9484             case 'e':
9485               switch (name[2])
9486               {
9487                 case 'l':
9488                   if (name[3] == 'e' &&
9489                       name[4] == 'c' &&
9490                       name[5] == 't')
9491                   {                               /* select     */
9492                     return -KEY_select;
9493                   }
9494
9495                   goto unknown;
9496
9497                 case 'm':
9498                   switch (name[3])
9499                   {
9500                     case 'c':
9501                       if (name[4] == 't' &&
9502                           name[5] == 'l')
9503                       {                           /* semctl     */
9504                         return -KEY_semctl;
9505                       }
9506
9507                       goto unknown;
9508
9509                     case 'g':
9510                       if (name[4] == 'e' &&
9511                           name[5] == 't')
9512                       {                           /* semget     */
9513                         return -KEY_semget;
9514                       }
9515
9516                       goto unknown;
9517
9518                     default:
9519                       goto unknown;
9520                   }
9521
9522                 default:
9523                   goto unknown;
9524               }
9525
9526             case 'h':
9527               if (name[2] == 'm')
9528               {
9529                 switch (name[3])
9530                 {
9531                   case 'c':
9532                     if (name[4] == 't' &&
9533                         name[5] == 'l')
9534                     {                             /* shmctl     */
9535                       return -KEY_shmctl;
9536                     }
9537
9538                     goto unknown;
9539
9540                   case 'g':
9541                     if (name[4] == 'e' &&
9542                         name[5] == 't')
9543                     {                             /* shmget     */
9544                       return -KEY_shmget;
9545                     }
9546
9547                     goto unknown;
9548
9549                   default:
9550                     goto unknown;
9551                 }
9552               }
9553
9554               goto unknown;
9555
9556             case 'o':
9557               if (name[2] == 'c' &&
9558                   name[3] == 'k' &&
9559                   name[4] == 'e' &&
9560                   name[5] == 't')
9561               {                                   /* socket     */
9562                 return -KEY_socket;
9563               }
9564
9565               goto unknown;
9566
9567             case 'p':
9568               if (name[2] == 'l' &&
9569                   name[3] == 'i' &&
9570                   name[4] == 'c' &&
9571                   name[5] == 'e')
9572               {                                   /* splice     */
9573                 return -KEY_splice;
9574               }
9575
9576               goto unknown;
9577
9578             case 'u':
9579               if (name[2] == 'b' &&
9580                   name[3] == 's' &&
9581                   name[4] == 't' &&
9582                   name[5] == 'r')
9583               {                                   /* substr     */
9584                 return -KEY_substr;
9585               }
9586
9587               goto unknown;
9588
9589             case 'y':
9590               if (name[2] == 's' &&
9591                   name[3] == 't' &&
9592                   name[4] == 'e' &&
9593                   name[5] == 'm')
9594               {                                   /* system     */
9595                 return -KEY_system;
9596               }
9597
9598               goto unknown;
9599
9600             default:
9601               goto unknown;
9602           }
9603
9604         case 'u':
9605           if (name[1] == 'n')
9606           {
9607             switch (name[2])
9608             {
9609               case 'l':
9610                 switch (name[3])
9611                 {
9612                   case 'e':
9613                     if (name[4] == 's' &&
9614                         name[5] == 's')
9615                     {                             /* unless     */
9616                       return KEY_unless;
9617                     }
9618
9619                     goto unknown;
9620
9621                   case 'i':
9622                     if (name[4] == 'n' &&
9623                         name[5] == 'k')
9624                     {                             /* unlink     */
9625                       return -KEY_unlink;
9626                     }
9627
9628                     goto unknown;
9629
9630                   default:
9631                     goto unknown;
9632                 }
9633
9634               case 'p':
9635                 if (name[3] == 'a' &&
9636                     name[4] == 'c' &&
9637                     name[5] == 'k')
9638                 {                                 /* unpack     */
9639                   return -KEY_unpack;
9640                 }
9641
9642                 goto unknown;
9643
9644               default:
9645                 goto unknown;
9646             }
9647           }
9648
9649           goto unknown;
9650
9651         case 'v':
9652           if (name[1] == 'a' &&
9653               name[2] == 'l' &&
9654               name[3] == 'u' &&
9655               name[4] == 'e' &&
9656               name[5] == 's')
9657           {                                       /* values     */
9658             return -KEY_values;
9659           }
9660
9661           goto unknown;
9662
9663         default:
9664           goto unknown;
9665       }
9666
9667     case 7: /* 29 tokens of length 7 */
9668       switch (name[0])
9669       {
9670         case 'D':
9671           if (name[1] == 'E' &&
9672               name[2] == 'S' &&
9673               name[3] == 'T' &&
9674               name[4] == 'R' &&
9675               name[5] == 'O' &&
9676               name[6] == 'Y')
9677           {                                       /* DESTROY    */
9678             return KEY_DESTROY;
9679           }
9680
9681           goto unknown;
9682
9683         case '_':
9684           if (name[1] == '_' &&
9685               name[2] == 'E' &&
9686               name[3] == 'N' &&
9687               name[4] == 'D' &&
9688               name[5] == '_' &&
9689               name[6] == '_')
9690           {                                       /* __END__    */
9691             return KEY___END__;
9692           }
9693
9694           goto unknown;
9695
9696         case 'b':
9697           if (name[1] == 'i' &&
9698               name[2] == 'n' &&
9699               name[3] == 'm' &&
9700               name[4] == 'o' &&
9701               name[5] == 'd' &&
9702               name[6] == 'e')
9703           {                                       /* binmode    */
9704             return -KEY_binmode;
9705           }
9706
9707           goto unknown;
9708
9709         case 'c':
9710           if (name[1] == 'o' &&
9711               name[2] == 'n' &&
9712               name[3] == 'n' &&
9713               name[4] == 'e' &&
9714               name[5] == 'c' &&
9715               name[6] == 't')
9716           {                                       /* connect    */
9717             return -KEY_connect;
9718           }
9719
9720           goto unknown;
9721
9722         case 'd':
9723           switch (name[1])
9724           {
9725             case 'b':
9726               if (name[2] == 'm' &&
9727                   name[3] == 'o' &&
9728                   name[4] == 'p' &&
9729                   name[5] == 'e' &&
9730                   name[6] == 'n')
9731               {                                   /* dbmopen    */
9732                 return -KEY_dbmopen;
9733               }
9734
9735               goto unknown;
9736
9737             case 'e':
9738               if (name[2] == 'f')
9739               {
9740                 switch (name[3])
9741                 {
9742                   case 'a':
9743                     if (name[4] == 'u' &&
9744                         name[5] == 'l' &&
9745                         name[6] == 't')
9746                     {                             /* default    */
9747                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9748                     }
9749
9750                     goto unknown;
9751
9752                   case 'i':
9753                     if (name[4] == 'n' &&
9754                         name[5] == 'e' &&
9755                         name[6] == 'd')
9756                     {                             /* defined    */
9757                       return KEY_defined;
9758                     }
9759
9760                     goto unknown;
9761
9762                   default:
9763                     goto unknown;
9764                 }
9765               }
9766
9767               goto unknown;
9768
9769             default:
9770               goto unknown;
9771           }
9772
9773         case 'f':
9774           if (name[1] == 'o' &&
9775               name[2] == 'r' &&
9776               name[3] == 'e' &&
9777               name[4] == 'a' &&
9778               name[5] == 'c' &&
9779               name[6] == 'h')
9780           {                                       /* foreach    */
9781             return KEY_foreach;
9782           }
9783
9784           goto unknown;
9785
9786         case 'g':
9787           if (name[1] == 'e' &&
9788               name[2] == 't' &&
9789               name[3] == 'p')
9790           {
9791             switch (name[4])
9792             {
9793               case 'g':
9794                 if (name[5] == 'r' &&
9795                     name[6] == 'p')
9796                 {                                 /* getpgrp    */
9797                   return -KEY_getpgrp;
9798                 }
9799
9800                 goto unknown;
9801
9802               case 'p':
9803                 if (name[5] == 'i' &&
9804                     name[6] == 'd')
9805                 {                                 /* getppid    */
9806                   return -KEY_getppid;
9807                 }
9808
9809                 goto unknown;
9810
9811               default:
9812                 goto unknown;
9813             }
9814           }
9815
9816           goto unknown;
9817
9818         case 'l':
9819           if (name[1] == 'c' &&
9820               name[2] == 'f' &&
9821               name[3] == 'i' &&
9822               name[4] == 'r' &&
9823               name[5] == 's' &&
9824               name[6] == 't')
9825           {                                       /* lcfirst    */
9826             return -KEY_lcfirst;
9827           }
9828
9829           goto unknown;
9830
9831         case 'o':
9832           if (name[1] == 'p' &&
9833               name[2] == 'e' &&
9834               name[3] == 'n' &&
9835               name[4] == 'd' &&
9836               name[5] == 'i' &&
9837               name[6] == 'r')
9838           {                                       /* opendir    */
9839             return -KEY_opendir;
9840           }
9841
9842           goto unknown;
9843
9844         case 'p':
9845           if (name[1] == 'a' &&
9846               name[2] == 'c' &&
9847               name[3] == 'k' &&
9848               name[4] == 'a' &&
9849               name[5] == 'g' &&
9850               name[6] == 'e')
9851           {                                       /* package    */
9852             return KEY_package;
9853           }
9854
9855           goto unknown;
9856
9857         case 'r':
9858           if (name[1] == 'e')
9859           {
9860             switch (name[2])
9861             {
9862               case 'a':
9863                 if (name[3] == 'd' &&
9864                     name[4] == 'd' &&
9865                     name[5] == 'i' &&
9866                     name[6] == 'r')
9867                 {                                 /* readdir    */
9868                   return -KEY_readdir;
9869                 }
9870
9871                 goto unknown;
9872
9873               case 'q':
9874                 if (name[3] == 'u' &&
9875                     name[4] == 'i' &&
9876                     name[5] == 'r' &&
9877                     name[6] == 'e')
9878                 {                                 /* require    */
9879                   return KEY_require;
9880                 }
9881
9882                 goto unknown;
9883
9884               case 'v':
9885                 if (name[3] == 'e' &&
9886                     name[4] == 'r' &&
9887                     name[5] == 's' &&
9888                     name[6] == 'e')
9889                 {                                 /* reverse    */
9890                   return -KEY_reverse;
9891                 }
9892
9893                 goto unknown;
9894
9895               default:
9896                 goto unknown;
9897             }
9898           }
9899
9900           goto unknown;
9901
9902         case 's':
9903           switch (name[1])
9904           {
9905             case 'e':
9906               switch (name[2])
9907               {
9908                 case 'e':
9909                   if (name[3] == 'k' &&
9910                       name[4] == 'd' &&
9911                       name[5] == 'i' &&
9912                       name[6] == 'r')
9913                   {                               /* seekdir    */
9914                     return -KEY_seekdir;
9915                   }
9916
9917                   goto unknown;
9918
9919                 case 't':
9920                   if (name[3] == 'p' &&
9921                       name[4] == 'g' &&
9922                       name[5] == 'r' &&
9923                       name[6] == 'p')
9924                   {                               /* setpgrp    */
9925                     return -KEY_setpgrp;
9926                   }
9927
9928                   goto unknown;
9929
9930                 default:
9931                   goto unknown;
9932               }
9933
9934             case 'h':
9935               if (name[2] == 'm' &&
9936                   name[3] == 'r' &&
9937                   name[4] == 'e' &&
9938                   name[5] == 'a' &&
9939                   name[6] == 'd')
9940               {                                   /* shmread    */
9941                 return -KEY_shmread;
9942               }
9943
9944               goto unknown;
9945
9946             case 'p':
9947               if (name[2] == 'r' &&
9948                   name[3] == 'i' &&
9949                   name[4] == 'n' &&
9950                   name[5] == 't' &&
9951                   name[6] == 'f')
9952               {                                   /* sprintf    */
9953                 return -KEY_sprintf;
9954               }
9955
9956               goto unknown;
9957
9958             case 'y':
9959               switch (name[2])
9960               {
9961                 case 'm':
9962                   if (name[3] == 'l' &&
9963                       name[4] == 'i' &&
9964                       name[5] == 'n' &&
9965                       name[6] == 'k')
9966                   {                               /* symlink    */
9967                     return -KEY_symlink;
9968                   }
9969
9970                   goto unknown;
9971
9972                 case 's':
9973                   switch (name[3])
9974                   {
9975                     case 'c':
9976                       if (name[4] == 'a' &&
9977                           name[5] == 'l' &&
9978                           name[6] == 'l')
9979                       {                           /* syscall    */
9980                         return -KEY_syscall;
9981                       }
9982
9983                       goto unknown;
9984
9985                     case 'o':
9986                       if (name[4] == 'p' &&
9987                           name[5] == 'e' &&
9988                           name[6] == 'n')
9989                       {                           /* sysopen    */
9990                         return -KEY_sysopen;
9991                       }
9992
9993                       goto unknown;
9994
9995                     case 'r':
9996                       if (name[4] == 'e' &&
9997                           name[5] == 'a' &&
9998                           name[6] == 'd')
9999                       {                           /* sysread    */
10000                         return -KEY_sysread;
10001                       }
10002
10003                       goto unknown;
10004
10005                     case 's':
10006                       if (name[4] == 'e' &&
10007                           name[5] == 'e' &&
10008                           name[6] == 'k')
10009                       {                           /* sysseek    */
10010                         return -KEY_sysseek;
10011                       }
10012
10013                       goto unknown;
10014
10015                     default:
10016                       goto unknown;
10017                   }
10018
10019                 default:
10020                   goto unknown;
10021               }
10022
10023             default:
10024               goto unknown;
10025           }
10026
10027         case 't':
10028           if (name[1] == 'e' &&
10029               name[2] == 'l' &&
10030               name[3] == 'l' &&
10031               name[4] == 'd' &&
10032               name[5] == 'i' &&
10033               name[6] == 'r')
10034           {                                       /* telldir    */
10035             return -KEY_telldir;
10036           }
10037
10038           goto unknown;
10039
10040         case 'u':
10041           switch (name[1])
10042           {
10043             case 'c':
10044               if (name[2] == 'f' &&
10045                   name[3] == 'i' &&
10046                   name[4] == 'r' &&
10047                   name[5] == 's' &&
10048                   name[6] == 't')
10049               {                                   /* ucfirst    */
10050                 return -KEY_ucfirst;
10051               }
10052
10053               goto unknown;
10054
10055             case 'n':
10056               if (name[2] == 's' &&
10057                   name[3] == 'h' &&
10058                   name[4] == 'i' &&
10059                   name[5] == 'f' &&
10060                   name[6] == 't')
10061               {                                   /* unshift    */
10062                 return -KEY_unshift;
10063               }
10064
10065               goto unknown;
10066
10067             default:
10068               goto unknown;
10069           }
10070
10071         case 'w':
10072           if (name[1] == 'a' &&
10073               name[2] == 'i' &&
10074               name[3] == 't' &&
10075               name[4] == 'p' &&
10076               name[5] == 'i' &&
10077               name[6] == 'd')
10078           {                                       /* waitpid    */
10079             return -KEY_waitpid;
10080           }
10081
10082           goto unknown;
10083
10084         default:
10085           goto unknown;
10086       }
10087
10088     case 8: /* 26 tokens of length 8 */
10089       switch (name[0])
10090       {
10091         case 'A':
10092           if (name[1] == 'U' &&
10093               name[2] == 'T' &&
10094               name[3] == 'O' &&
10095               name[4] == 'L' &&
10096               name[5] == 'O' &&
10097               name[6] == 'A' &&
10098               name[7] == 'D')
10099           {                                       /* AUTOLOAD   */
10100             return KEY_AUTOLOAD;
10101           }
10102
10103           goto unknown;
10104
10105         case '_':
10106           if (name[1] == '_')
10107           {
10108             switch (name[2])
10109             {
10110               case 'D':
10111                 if (name[3] == 'A' &&
10112                     name[4] == 'T' &&
10113                     name[5] == 'A' &&
10114                     name[6] == '_' &&
10115                     name[7] == '_')
10116                 {                                 /* __DATA__   */
10117                   return KEY___DATA__;
10118                 }
10119
10120                 goto unknown;
10121
10122               case 'F':
10123                 if (name[3] == 'I' &&
10124                     name[4] == 'L' &&
10125                     name[5] == 'E' &&
10126                     name[6] == '_' &&
10127                     name[7] == '_')
10128                 {                                 /* __FILE__   */
10129                   return -KEY___FILE__;
10130                 }
10131
10132                 goto unknown;
10133
10134               case 'L':
10135                 if (name[3] == 'I' &&
10136                     name[4] == 'N' &&
10137                     name[5] == 'E' &&
10138                     name[6] == '_' &&
10139                     name[7] == '_')
10140                 {                                 /* __LINE__   */
10141                   return -KEY___LINE__;
10142                 }
10143
10144                 goto unknown;
10145
10146               default:
10147                 goto unknown;
10148             }
10149           }
10150
10151           goto unknown;
10152
10153         case 'c':
10154           switch (name[1])
10155           {
10156             case 'l':
10157               if (name[2] == 'o' &&
10158                   name[3] == 's' &&
10159                   name[4] == 'e' &&
10160                   name[5] == 'd' &&
10161                   name[6] == 'i' &&
10162                   name[7] == 'r')
10163               {                                   /* closedir   */
10164                 return -KEY_closedir;
10165               }
10166
10167               goto unknown;
10168
10169             case 'o':
10170               if (name[2] == 'n' &&
10171                   name[3] == 't' &&
10172                   name[4] == 'i' &&
10173                   name[5] == 'n' &&
10174                   name[6] == 'u' &&
10175                   name[7] == 'e')
10176               {                                   /* continue   */
10177                 return -KEY_continue;
10178               }
10179
10180               goto unknown;
10181
10182             default:
10183               goto unknown;
10184           }
10185
10186         case 'd':
10187           if (name[1] == 'b' &&
10188               name[2] == 'm' &&
10189               name[3] == 'c' &&
10190               name[4] == 'l' &&
10191               name[5] == 'o' &&
10192               name[6] == 's' &&
10193               name[7] == 'e')
10194           {                                       /* dbmclose   */
10195             return -KEY_dbmclose;
10196           }
10197
10198           goto unknown;
10199
10200         case 'e':
10201           if (name[1] == 'n' &&
10202               name[2] == 'd')
10203           {
10204             switch (name[3])
10205             {
10206               case 'g':
10207                 if (name[4] == 'r' &&
10208                     name[5] == 'e' &&
10209                     name[6] == 'n' &&
10210                     name[7] == 't')
10211                 {                                 /* endgrent   */
10212                   return -KEY_endgrent;
10213                 }
10214
10215                 goto unknown;
10216
10217               case 'p':
10218                 if (name[4] == 'w' &&
10219                     name[5] == 'e' &&
10220                     name[6] == 'n' &&
10221                     name[7] == 't')
10222                 {                                 /* endpwent   */
10223                   return -KEY_endpwent;
10224                 }
10225
10226                 goto unknown;
10227
10228               default:
10229                 goto unknown;
10230             }
10231           }
10232
10233           goto unknown;
10234
10235         case 'f':
10236           if (name[1] == 'o' &&
10237               name[2] == 'r' &&
10238               name[3] == 'm' &&
10239               name[4] == 'l' &&
10240               name[5] == 'i' &&
10241               name[6] == 'n' &&
10242               name[7] == 'e')
10243           {                                       /* formline   */
10244             return -KEY_formline;
10245           }
10246
10247           goto unknown;
10248
10249         case 'g':
10250           if (name[1] == 'e' &&
10251               name[2] == 't')
10252           {
10253             switch (name[3])
10254             {
10255               case 'g':
10256                 if (name[4] == 'r')
10257                 {
10258                   switch (name[5])
10259                   {
10260                     case 'e':
10261                       if (name[6] == 'n' &&
10262                           name[7] == 't')
10263                       {                           /* getgrent   */
10264                         return -KEY_getgrent;
10265                       }
10266
10267                       goto unknown;
10268
10269                     case 'g':
10270                       if (name[6] == 'i' &&
10271                           name[7] == 'd')
10272                       {                           /* getgrgid   */
10273                         return -KEY_getgrgid;
10274                       }
10275
10276                       goto unknown;
10277
10278                     case 'n':
10279                       if (name[6] == 'a' &&
10280                           name[7] == 'm')
10281                       {                           /* getgrnam   */
10282                         return -KEY_getgrnam;
10283                       }
10284
10285                       goto unknown;
10286
10287                     default:
10288                       goto unknown;
10289                   }
10290                 }
10291
10292                 goto unknown;
10293
10294               case 'l':
10295                 if (name[4] == 'o' &&
10296                     name[5] == 'g' &&
10297                     name[6] == 'i' &&
10298                     name[7] == 'n')
10299                 {                                 /* getlogin   */
10300                   return -KEY_getlogin;
10301                 }
10302
10303                 goto unknown;
10304
10305               case 'p':
10306                 if (name[4] == 'w')
10307                 {
10308                   switch (name[5])
10309                   {
10310                     case 'e':
10311                       if (name[6] == 'n' &&
10312                           name[7] == 't')
10313                       {                           /* getpwent   */
10314                         return -KEY_getpwent;
10315                       }
10316
10317                       goto unknown;
10318
10319                     case 'n':
10320                       if (name[6] == 'a' &&
10321                           name[7] == 'm')
10322                       {                           /* getpwnam   */
10323                         return -KEY_getpwnam;
10324                       }
10325
10326                       goto unknown;
10327
10328                     case 'u':
10329                       if (name[6] == 'i' &&
10330                           name[7] == 'd')
10331                       {                           /* getpwuid   */
10332                         return -KEY_getpwuid;
10333                       }
10334
10335                       goto unknown;
10336
10337                     default:
10338                       goto unknown;
10339                   }
10340                 }
10341
10342                 goto unknown;
10343
10344               default:
10345                 goto unknown;
10346             }
10347           }
10348
10349           goto unknown;
10350
10351         case 'r':
10352           if (name[1] == 'e' &&
10353               name[2] == 'a' &&
10354               name[3] == 'd')
10355           {
10356             switch (name[4])
10357             {
10358               case 'l':
10359                 if (name[5] == 'i' &&
10360                     name[6] == 'n')
10361                 {
10362                   switch (name[7])
10363                   {
10364                     case 'e':
10365                       {                           /* readline   */
10366                         return -KEY_readline;
10367                       }
10368
10369                     case 'k':
10370                       {                           /* readlink   */
10371                         return -KEY_readlink;
10372                       }
10373
10374                     default:
10375                       goto unknown;
10376                   }
10377                 }
10378
10379                 goto unknown;
10380
10381               case 'p':
10382                 if (name[5] == 'i' &&
10383                     name[6] == 'p' &&
10384                     name[7] == 'e')
10385                 {                                 /* readpipe   */
10386                   return -KEY_readpipe;
10387                 }
10388
10389                 goto unknown;
10390
10391               default:
10392                 goto unknown;
10393             }
10394           }
10395
10396           goto unknown;
10397
10398         case 's':
10399           switch (name[1])
10400           {
10401             case 'e':
10402               if (name[2] == 't')
10403               {
10404                 switch (name[3])
10405                 {
10406                   case 'g':
10407                     if (name[4] == 'r' &&
10408                         name[5] == 'e' &&
10409                         name[6] == 'n' &&
10410                         name[7] == 't')
10411                     {                             /* setgrent   */
10412                       return -KEY_setgrent;
10413                     }
10414
10415                     goto unknown;
10416
10417                   case 'p':
10418                     if (name[4] == 'w' &&
10419                         name[5] == 'e' &&
10420                         name[6] == 'n' &&
10421                         name[7] == 't')
10422                     {                             /* setpwent   */
10423                       return -KEY_setpwent;
10424                     }
10425
10426                     goto unknown;
10427
10428                   default:
10429                     goto unknown;
10430                 }
10431               }
10432
10433               goto unknown;
10434
10435             case 'h':
10436               switch (name[2])
10437               {
10438                 case 'm':
10439                   if (name[3] == 'w' &&
10440                       name[4] == 'r' &&
10441                       name[5] == 'i' &&
10442                       name[6] == 't' &&
10443                       name[7] == 'e')
10444                   {                               /* shmwrite   */
10445                     return -KEY_shmwrite;
10446                   }
10447
10448                   goto unknown;
10449
10450                 case 'u':
10451                   if (name[3] == 't' &&
10452                       name[4] == 'd' &&
10453                       name[5] == 'o' &&
10454                       name[6] == 'w' &&
10455                       name[7] == 'n')
10456                   {                               /* shutdown   */
10457                     return -KEY_shutdown;
10458                   }
10459
10460                   goto unknown;
10461
10462                 default:
10463                   goto unknown;
10464               }
10465
10466             case 'y':
10467               if (name[2] == 's' &&
10468                   name[3] == 'w' &&
10469                   name[4] == 'r' &&
10470                   name[5] == 'i' &&
10471                   name[6] == 't' &&
10472                   name[7] == 'e')
10473               {                                   /* syswrite   */
10474                 return -KEY_syswrite;
10475               }
10476
10477               goto unknown;
10478
10479             default:
10480               goto unknown;
10481           }
10482
10483         case 't':
10484           if (name[1] == 'r' &&
10485               name[2] == 'u' &&
10486               name[3] == 'n' &&
10487               name[4] == 'c' &&
10488               name[5] == 'a' &&
10489               name[6] == 't' &&
10490               name[7] == 'e')
10491           {                                       /* truncate   */
10492             return -KEY_truncate;
10493           }
10494
10495           goto unknown;
10496
10497         default:
10498           goto unknown;
10499       }
10500
10501     case 9: /* 9 tokens of length 9 */
10502       switch (name[0])
10503       {
10504         case 'U':
10505           if (name[1] == 'N' &&
10506               name[2] == 'I' &&
10507               name[3] == 'T' &&
10508               name[4] == 'C' &&
10509               name[5] == 'H' &&
10510               name[6] == 'E' &&
10511               name[7] == 'C' &&
10512               name[8] == 'K')
10513           {                                       /* UNITCHECK  */
10514             return KEY_UNITCHECK;
10515           }
10516
10517           goto unknown;
10518
10519         case 'e':
10520           if (name[1] == 'n' &&
10521               name[2] == 'd' &&
10522               name[3] == 'n' &&
10523               name[4] == 'e' &&
10524               name[5] == 't' &&
10525               name[6] == 'e' &&
10526               name[7] == 'n' &&
10527               name[8] == 't')
10528           {                                       /* endnetent  */
10529             return -KEY_endnetent;
10530           }
10531
10532           goto unknown;
10533
10534         case 'g':
10535           if (name[1] == 'e' &&
10536               name[2] == 't' &&
10537               name[3] == 'n' &&
10538               name[4] == 'e' &&
10539               name[5] == 't' &&
10540               name[6] == 'e' &&
10541               name[7] == 'n' &&
10542               name[8] == 't')
10543           {                                       /* getnetent  */
10544             return -KEY_getnetent;
10545           }
10546
10547           goto unknown;
10548
10549         case 'l':
10550           if (name[1] == 'o' &&
10551               name[2] == 'c' &&
10552               name[3] == 'a' &&
10553               name[4] == 'l' &&
10554               name[5] == 't' &&
10555               name[6] == 'i' &&
10556               name[7] == 'm' &&
10557               name[8] == 'e')
10558           {                                       /* localtime  */
10559             return -KEY_localtime;
10560           }
10561
10562           goto unknown;
10563
10564         case 'p':
10565           if (name[1] == 'r' &&
10566               name[2] == 'o' &&
10567               name[3] == 't' &&
10568               name[4] == 'o' &&
10569               name[5] == 't' &&
10570               name[6] == 'y' &&
10571               name[7] == 'p' &&
10572               name[8] == 'e')
10573           {                                       /* prototype  */
10574             return KEY_prototype;
10575           }
10576
10577           goto unknown;
10578
10579         case 'q':
10580           if (name[1] == 'u' &&
10581               name[2] == 'o' &&
10582               name[3] == 't' &&
10583               name[4] == 'e' &&
10584               name[5] == 'm' &&
10585               name[6] == 'e' &&
10586               name[7] == 't' &&
10587               name[8] == 'a')
10588           {                                       /* quotemeta  */
10589             return -KEY_quotemeta;
10590           }
10591
10592           goto unknown;
10593
10594         case 'r':
10595           if (name[1] == 'e' &&
10596               name[2] == 'w' &&
10597               name[3] == 'i' &&
10598               name[4] == 'n' &&
10599               name[5] == 'd' &&
10600               name[6] == 'd' &&
10601               name[7] == 'i' &&
10602               name[8] == 'r')
10603           {                                       /* rewinddir  */
10604             return -KEY_rewinddir;
10605           }
10606
10607           goto unknown;
10608
10609         case 's':
10610           if (name[1] == 'e' &&
10611               name[2] == 't' &&
10612               name[3] == 'n' &&
10613               name[4] == 'e' &&
10614               name[5] == 't' &&
10615               name[6] == 'e' &&
10616               name[7] == 'n' &&
10617               name[8] == 't')
10618           {                                       /* setnetent  */
10619             return -KEY_setnetent;
10620           }
10621
10622           goto unknown;
10623
10624         case 'w':
10625           if (name[1] == 'a' &&
10626               name[2] == 'n' &&
10627               name[3] == 't' &&
10628               name[4] == 'a' &&
10629               name[5] == 'r' &&
10630               name[6] == 'r' &&
10631               name[7] == 'a' &&
10632               name[8] == 'y')
10633           {                                       /* wantarray  */
10634             return -KEY_wantarray;
10635           }
10636
10637           goto unknown;
10638
10639         default:
10640           goto unknown;
10641       }
10642
10643     case 10: /* 9 tokens of length 10 */
10644       switch (name[0])
10645       {
10646         case 'e':
10647           if (name[1] == 'n' &&
10648               name[2] == 'd')
10649           {
10650             switch (name[3])
10651             {
10652               case 'h':
10653                 if (name[4] == 'o' &&
10654                     name[5] == 's' &&
10655                     name[6] == 't' &&
10656                     name[7] == 'e' &&
10657                     name[8] == 'n' &&
10658                     name[9] == 't')
10659                 {                                 /* endhostent */
10660                   return -KEY_endhostent;
10661                 }
10662
10663                 goto unknown;
10664
10665               case 's':
10666                 if (name[4] == 'e' &&
10667                     name[5] == 'r' &&
10668                     name[6] == 'v' &&
10669                     name[7] == 'e' &&
10670                     name[8] == 'n' &&
10671                     name[9] == 't')
10672                 {                                 /* endservent */
10673                   return -KEY_endservent;
10674                 }
10675
10676                 goto unknown;
10677
10678               default:
10679                 goto unknown;
10680             }
10681           }
10682
10683           goto unknown;
10684
10685         case 'g':
10686           if (name[1] == 'e' &&
10687               name[2] == 't')
10688           {
10689             switch (name[3])
10690             {
10691               case 'h':
10692                 if (name[4] == 'o' &&
10693                     name[5] == 's' &&
10694                     name[6] == 't' &&
10695                     name[7] == 'e' &&
10696                     name[8] == 'n' &&
10697                     name[9] == 't')
10698                 {                                 /* gethostent */
10699                   return -KEY_gethostent;
10700                 }
10701
10702                 goto unknown;
10703
10704               case 's':
10705                 switch (name[4])
10706                 {
10707                   case 'e':
10708                     if (name[5] == 'r' &&
10709                         name[6] == 'v' &&
10710                         name[7] == 'e' &&
10711                         name[8] == 'n' &&
10712                         name[9] == 't')
10713                     {                             /* getservent */
10714                       return -KEY_getservent;
10715                     }
10716
10717                     goto unknown;
10718
10719                   case 'o':
10720                     if (name[5] == 'c' &&
10721                         name[6] == 'k' &&
10722                         name[7] == 'o' &&
10723                         name[8] == 'p' &&
10724                         name[9] == 't')
10725                     {                             /* getsockopt */
10726                       return -KEY_getsockopt;
10727                     }
10728
10729                     goto unknown;
10730
10731                   default:
10732                     goto unknown;
10733                 }
10734
10735               default:
10736                 goto unknown;
10737             }
10738           }
10739
10740           goto unknown;
10741
10742         case 's':
10743           switch (name[1])
10744           {
10745             case 'e':
10746               if (name[2] == 't')
10747               {
10748                 switch (name[3])
10749                 {
10750                   case 'h':
10751                     if (name[4] == 'o' &&
10752                         name[5] == 's' &&
10753                         name[6] == 't' &&
10754                         name[7] == 'e' &&
10755                         name[8] == 'n' &&
10756                         name[9] == 't')
10757                     {                             /* sethostent */
10758                       return -KEY_sethostent;
10759                     }
10760
10761                     goto unknown;
10762
10763                   case 's':
10764                     switch (name[4])
10765                     {
10766                       case 'e':
10767                         if (name[5] == 'r' &&
10768                             name[6] == 'v' &&
10769                             name[7] == 'e' &&
10770                             name[8] == 'n' &&
10771                             name[9] == 't')
10772                         {                         /* setservent */
10773                           return -KEY_setservent;
10774                         }
10775
10776                         goto unknown;
10777
10778                       case 'o':
10779                         if (name[5] == 'c' &&
10780                             name[6] == 'k' &&
10781                             name[7] == 'o' &&
10782                             name[8] == 'p' &&
10783                             name[9] == 't')
10784                         {                         /* setsockopt */
10785                           return -KEY_setsockopt;
10786                         }
10787
10788                         goto unknown;
10789
10790                       default:
10791                         goto unknown;
10792                     }
10793
10794                   default:
10795                     goto unknown;
10796                 }
10797               }
10798
10799               goto unknown;
10800
10801             case 'o':
10802               if (name[2] == 'c' &&
10803                   name[3] == 'k' &&
10804                   name[4] == 'e' &&
10805                   name[5] == 't' &&
10806                   name[6] == 'p' &&
10807                   name[7] == 'a' &&
10808                   name[8] == 'i' &&
10809                   name[9] == 'r')
10810               {                                   /* socketpair */
10811                 return -KEY_socketpair;
10812               }
10813
10814               goto unknown;
10815
10816             default:
10817               goto unknown;
10818           }
10819
10820         default:
10821           goto unknown;
10822       }
10823
10824     case 11: /* 8 tokens of length 11 */
10825       switch (name[0])
10826       {
10827         case '_':
10828           if (name[1] == '_' &&
10829               name[2] == 'P' &&
10830               name[3] == 'A' &&
10831               name[4] == 'C' &&
10832               name[5] == 'K' &&
10833               name[6] == 'A' &&
10834               name[7] == 'G' &&
10835               name[8] == 'E' &&
10836               name[9] == '_' &&
10837               name[10] == '_')
10838           {                                       /* __PACKAGE__ */
10839             return -KEY___PACKAGE__;
10840           }
10841
10842           goto unknown;
10843
10844         case 'e':
10845           if (name[1] == 'n' &&
10846               name[2] == 'd' &&
10847               name[3] == 'p' &&
10848               name[4] == 'r' &&
10849               name[5] == 'o' &&
10850               name[6] == 't' &&
10851               name[7] == 'o' &&
10852               name[8] == 'e' &&
10853               name[9] == 'n' &&
10854               name[10] == 't')
10855           {                                       /* endprotoent */
10856             return -KEY_endprotoent;
10857           }
10858
10859           goto unknown;
10860
10861         case 'g':
10862           if (name[1] == 'e' &&
10863               name[2] == 't')
10864           {
10865             switch (name[3])
10866             {
10867               case 'p':
10868                 switch (name[4])
10869                 {
10870                   case 'e':
10871                     if (name[5] == 'e' &&
10872                         name[6] == 'r' &&
10873                         name[7] == 'n' &&
10874                         name[8] == 'a' &&
10875                         name[9] == 'm' &&
10876                         name[10] == 'e')
10877                     {                             /* getpeername */
10878                       return -KEY_getpeername;
10879                     }
10880
10881                     goto unknown;
10882
10883                   case 'r':
10884                     switch (name[5])
10885                     {
10886                       case 'i':
10887                         if (name[6] == 'o' &&
10888                             name[7] == 'r' &&
10889                             name[8] == 'i' &&
10890                             name[9] == 't' &&
10891                             name[10] == 'y')
10892                         {                         /* getpriority */
10893                           return -KEY_getpriority;
10894                         }
10895
10896                         goto unknown;
10897
10898                       case 'o':
10899                         if (name[6] == 't' &&
10900                             name[7] == 'o' &&
10901                             name[8] == 'e' &&
10902                             name[9] == 'n' &&
10903                             name[10] == 't')
10904                         {                         /* getprotoent */
10905                           return -KEY_getprotoent;
10906                         }
10907
10908                         goto unknown;
10909
10910                       default:
10911                         goto unknown;
10912                     }
10913
10914                   default:
10915                     goto unknown;
10916                 }
10917
10918               case 's':
10919                 if (name[4] == 'o' &&
10920                     name[5] == 'c' &&
10921                     name[6] == 'k' &&
10922                     name[7] == 'n' &&
10923                     name[8] == 'a' &&
10924                     name[9] == 'm' &&
10925                     name[10] == 'e')
10926                 {                                 /* getsockname */
10927                   return -KEY_getsockname;
10928                 }
10929
10930                 goto unknown;
10931
10932               default:
10933                 goto unknown;
10934             }
10935           }
10936
10937           goto unknown;
10938
10939         case 's':
10940           if (name[1] == 'e' &&
10941               name[2] == 't' &&
10942               name[3] == 'p' &&
10943               name[4] == 'r')
10944           {
10945             switch (name[5])
10946             {
10947               case 'i':
10948                 if (name[6] == 'o' &&
10949                     name[7] == 'r' &&
10950                     name[8] == 'i' &&
10951                     name[9] == 't' &&
10952                     name[10] == 'y')
10953                 {                                 /* setpriority */
10954                   return -KEY_setpriority;
10955                 }
10956
10957                 goto unknown;
10958
10959               case 'o':
10960                 if (name[6] == 't' &&
10961                     name[7] == 'o' &&
10962                     name[8] == 'e' &&
10963                     name[9] == 'n' &&
10964                     name[10] == 't')
10965                 {                                 /* setprotoent */
10966                   return -KEY_setprotoent;
10967                 }
10968
10969                 goto unknown;
10970
10971               default:
10972                 goto unknown;
10973             }
10974           }
10975
10976           goto unknown;
10977
10978         default:
10979           goto unknown;
10980       }
10981
10982     case 12: /* 2 tokens of length 12 */
10983       if (name[0] == 'g' &&
10984           name[1] == 'e' &&
10985           name[2] == 't' &&
10986           name[3] == 'n' &&
10987           name[4] == 'e' &&
10988           name[5] == 't' &&
10989           name[6] == 'b' &&
10990           name[7] == 'y')
10991       {
10992         switch (name[8])
10993         {
10994           case 'a':
10995             if (name[9] == 'd' &&
10996                 name[10] == 'd' &&
10997                 name[11] == 'r')
10998             {                                     /* getnetbyaddr */
10999               return -KEY_getnetbyaddr;
11000             }
11001
11002             goto unknown;
11003
11004           case 'n':
11005             if (name[9] == 'a' &&
11006                 name[10] == 'm' &&
11007                 name[11] == 'e')
11008             {                                     /* getnetbyname */
11009               return -KEY_getnetbyname;
11010             }
11011
11012             goto unknown;
11013
11014           default:
11015             goto unknown;
11016         }
11017       }
11018
11019       goto unknown;
11020
11021     case 13: /* 4 tokens of length 13 */
11022       if (name[0] == 'g' &&
11023           name[1] == 'e' &&
11024           name[2] == 't')
11025       {
11026         switch (name[3])
11027         {
11028           case 'h':
11029             if (name[4] == 'o' &&
11030                 name[5] == 's' &&
11031                 name[6] == 't' &&
11032                 name[7] == 'b' &&
11033                 name[8] == 'y')
11034             {
11035               switch (name[9])
11036               {
11037                 case 'a':
11038                   if (name[10] == 'd' &&
11039                       name[11] == 'd' &&
11040                       name[12] == 'r')
11041                   {                               /* gethostbyaddr */
11042                     return -KEY_gethostbyaddr;
11043                   }
11044
11045                   goto unknown;
11046
11047                 case 'n':
11048                   if (name[10] == 'a' &&
11049                       name[11] == 'm' &&
11050                       name[12] == 'e')
11051                   {                               /* gethostbyname */
11052                     return -KEY_gethostbyname;
11053                   }
11054
11055                   goto unknown;
11056
11057                 default:
11058                   goto unknown;
11059               }
11060             }
11061
11062             goto unknown;
11063
11064           case 's':
11065             if (name[4] == 'e' &&
11066                 name[5] == 'r' &&
11067                 name[6] == 'v' &&
11068                 name[7] == 'b' &&
11069                 name[8] == 'y')
11070             {
11071               switch (name[9])
11072               {
11073                 case 'n':
11074                   if (name[10] == 'a' &&
11075                       name[11] == 'm' &&
11076                       name[12] == 'e')
11077                   {                               /* getservbyname */
11078                     return -KEY_getservbyname;
11079                   }
11080
11081                   goto unknown;
11082
11083                 case 'p':
11084                   if (name[10] == 'o' &&
11085                       name[11] == 'r' &&
11086                       name[12] == 't')
11087                   {                               /* getservbyport */
11088                     return -KEY_getservbyport;
11089                   }
11090
11091                   goto unknown;
11092
11093                 default:
11094                   goto unknown;
11095               }
11096             }
11097
11098             goto unknown;
11099
11100           default:
11101             goto unknown;
11102         }
11103       }
11104
11105       goto unknown;
11106
11107     case 14: /* 1 tokens of length 14 */
11108       if (name[0] == 'g' &&
11109           name[1] == 'e' &&
11110           name[2] == 't' &&
11111           name[3] == 'p' &&
11112           name[4] == 'r' &&
11113           name[5] == 'o' &&
11114           name[6] == 't' &&
11115           name[7] == 'o' &&
11116           name[8] == 'b' &&
11117           name[9] == 'y' &&
11118           name[10] == 'n' &&
11119           name[11] == 'a' &&
11120           name[12] == 'm' &&
11121           name[13] == 'e')
11122       {                                           /* getprotobyname */
11123         return -KEY_getprotobyname;
11124       }
11125
11126       goto unknown;
11127
11128     case 16: /* 1 tokens of length 16 */
11129       if (name[0] == 'g' &&
11130           name[1] == 'e' &&
11131           name[2] == 't' &&
11132           name[3] == 'p' &&
11133           name[4] == 'r' &&
11134           name[5] == 'o' &&
11135           name[6] == 't' &&
11136           name[7] == 'o' &&
11137           name[8] == 'b' &&
11138           name[9] == 'y' &&
11139           name[10] == 'n' &&
11140           name[11] == 'u' &&
11141           name[12] == 'm' &&
11142           name[13] == 'b' &&
11143           name[14] == 'e' &&
11144           name[15] == 'r')
11145       {                                           /* getprotobynumber */
11146         return -KEY_getprotobynumber;
11147       }
11148
11149       goto unknown;
11150
11151     default:
11152       goto unknown;
11153   }
11154
11155 unknown:
11156   return 0;
11157 }
11158
11159 STATIC void
11160 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11161 {
11162     dVAR;
11163
11164     PERL_ARGS_ASSERT_CHECKCOMMA;
11165
11166     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11167         if (ckWARN(WARN_SYNTAX)) {
11168             int level = 1;
11169             const char *w;
11170             for (w = s+2; *w && level; w++) {
11171                 if (*w == '(')
11172                     ++level;
11173                 else if (*w == ')')
11174                     --level;
11175             }
11176             while (isSPACE(*w))
11177                 ++w;
11178             /* the list of chars below is for end of statements or
11179              * block / parens, boolean operators (&&, ||, //) and branch
11180              * constructs (or, and, if, until, unless, while, err, for).
11181              * Not a very solid hack... */
11182             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11183                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11184                             "%s (...) interpreted as function",name);
11185         }
11186     }
11187     while (s < PL_bufend && isSPACE(*s))
11188         s++;
11189     if (*s == '(')
11190         s++;
11191     while (s < PL_bufend && isSPACE(*s))
11192         s++;
11193     if (isIDFIRST_lazy_if(s,UTF)) {
11194         const char * const w = s++;
11195         while (isALNUM_lazy_if(s,UTF))
11196             s++;
11197         while (s < PL_bufend && isSPACE(*s))
11198             s++;
11199         if (*s == ',') {
11200             GV* gv;
11201             if (keyword(w, s - w, 0))
11202                 return;
11203
11204             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11205             if (gv && GvCVu(gv))
11206                 return;
11207             Perl_croak(aTHX_ "No comma allowed after %s", what);
11208         }
11209     }
11210 }
11211
11212 /* Either returns sv, or mortalizes sv and returns a new SV*.
11213    Best used as sv=new_constant(..., sv, ...).
11214    If s, pv are NULL, calls subroutine with one argument,
11215    and type is used with error messages only. */
11216
11217 STATIC SV *
11218 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11219                SV *sv, SV *pv, const char *type, STRLEN typelen)
11220 {
11221     dVAR; dSP;
11222     HV * const table = GvHV(PL_hintgv);          /* ^H */
11223     SV *res;
11224     SV **cvp;
11225     SV *cv, *typesv;
11226     const char *why1 = "", *why2 = "", *why3 = "";
11227
11228     PERL_ARGS_ASSERT_NEW_CONSTANT;
11229
11230     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11231         SV *msg;
11232
11233         why2 = (const char *)
11234             (strEQ(key,"charnames")
11235              ? "(possibly a missing \"use charnames ...\")"
11236              : "");
11237         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11238                             (type ? type: "undef"), why2);
11239
11240         /* This is convoluted and evil ("goto considered harmful")
11241          * but I do not understand the intricacies of all the different
11242          * failure modes of %^H in here.  The goal here is to make
11243          * the most probable error message user-friendly. --jhi */
11244
11245         goto msgdone;
11246
11247     report:
11248         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11249                             (type ? type: "undef"), why1, why2, why3);
11250     msgdone:
11251         yyerror(SvPVX_const(msg));
11252         SvREFCNT_dec(msg);
11253         return sv;
11254     }
11255     cvp = hv_fetch(table, key, keylen, FALSE);
11256     if (!cvp || !SvOK(*cvp)) {
11257         why1 = "$^H{";
11258         why2 = key;
11259         why3 = "} is not defined";
11260         goto report;
11261     }
11262     sv_2mortal(sv);                     /* Parent created it permanently */
11263     cv = *cvp;
11264     if (!pv && s)
11265         pv = newSVpvn_flags(s, len, SVs_TEMP);
11266     if (type && pv)
11267         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11268     else
11269         typesv = &PL_sv_undef;
11270
11271     PUSHSTACKi(PERLSI_OVERLOAD);
11272     ENTER ;
11273     SAVETMPS;
11274
11275     PUSHMARK(SP) ;
11276     EXTEND(sp, 3);
11277     if (pv)
11278         PUSHs(pv);
11279     PUSHs(sv);
11280     if (pv)
11281         PUSHs(typesv);
11282     PUTBACK;
11283     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11284
11285     SPAGAIN ;
11286
11287     /* Check the eval first */
11288     if (!PL_in_eval && SvTRUE(ERRSV)) {
11289         sv_catpvs(ERRSV, "Propagated");
11290         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11291         (void)POPs;
11292         res = SvREFCNT_inc_simple(sv);
11293     }
11294     else {
11295         res = POPs;
11296         SvREFCNT_inc_simple_void(res);
11297     }
11298
11299     PUTBACK ;
11300     FREETMPS ;
11301     LEAVE ;
11302     POPSTACK;
11303
11304     if (!SvOK(res)) {
11305         why1 = "Call to &{$^H{";
11306         why2 = key;
11307         why3 = "}} did not return a defined value";
11308         sv = res;
11309         goto report;
11310     }
11311
11312     return res;
11313 }
11314
11315 /* Returns a NUL terminated string, with the length of the string written to
11316    *slp
11317    */
11318 STATIC char *
11319 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11320 {
11321     dVAR;
11322     register char *d = dest;
11323     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11324
11325     PERL_ARGS_ASSERT_SCAN_WORD;
11326
11327     for (;;) {
11328         if (d >= e)
11329             Perl_croak(aTHX_ ident_too_long);
11330         if (isALNUM(*s))        /* UTF handled below */
11331             *d++ = *s++;
11332         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11333             *d++ = ':';
11334             *d++ = ':';
11335             s++;
11336         }
11337         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11338             *d++ = *s++;
11339             *d++ = *s++;
11340         }
11341         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11342             char *t = s + UTF8SKIP(s);
11343             size_t len;
11344             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11345                 t += UTF8SKIP(t);
11346             len = t - s;
11347             if (d + len > e)
11348                 Perl_croak(aTHX_ ident_too_long);
11349             Copy(s, d, len, char);
11350             d += len;
11351             s = t;
11352         }
11353         else {
11354             *d = '\0';
11355             *slp = d - dest;
11356             return s;
11357         }
11358     }
11359 }
11360
11361 STATIC char *
11362 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11363 {
11364     dVAR;
11365     char *bracket = NULL;
11366     char funny = *s++;
11367     register char *d = dest;
11368     register char * const e = d + destlen + 3;    /* two-character token, ending NUL */
11369
11370     PERL_ARGS_ASSERT_SCAN_IDENT;
11371
11372     if (isSPACE(*s))
11373         s = PEEKSPACE(s);
11374     if (isDIGIT(*s)) {
11375         while (isDIGIT(*s)) {
11376             if (d >= e)
11377                 Perl_croak(aTHX_ ident_too_long);
11378             *d++ = *s++;
11379         }
11380     }
11381     else {
11382         for (;;) {
11383             if (d >= e)
11384                 Perl_croak(aTHX_ ident_too_long);
11385             if (isALNUM(*s))    /* UTF handled below */
11386                 *d++ = *s++;
11387             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11388                 *d++ = ':';
11389                 *d++ = ':';
11390                 s++;
11391             }
11392             else if (*s == ':' && s[1] == ':') {
11393                 *d++ = *s++;
11394                 *d++ = *s++;
11395             }
11396             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11397                 char *t = s + UTF8SKIP(s);
11398                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11399                     t += UTF8SKIP(t);
11400                 if (d + (t - s) > e)
11401                     Perl_croak(aTHX_ ident_too_long);
11402                 Copy(s, d, t - s, char);
11403                 d += t - s;
11404                 s = t;
11405             }
11406             else
11407                 break;
11408         }
11409     }
11410     *d = '\0';
11411     d = dest;
11412     if (*d) {
11413         if (PL_lex_state != LEX_NORMAL)
11414             PL_lex_state = LEX_INTERPENDMAYBE;
11415         return s;
11416     }
11417     if (*s == '$' && s[1] &&
11418         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11419     {
11420         return s;
11421     }
11422     if (*s == '{') {
11423         bracket = s;
11424         s++;
11425     }
11426     else if (ck_uni)
11427         check_uni();
11428     if (s < send)
11429         *d = *s++;
11430     d[1] = '\0';
11431     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11432         *d = toCTRL(*s);
11433         s++;
11434     }
11435     if (bracket) {
11436         if (isSPACE(s[-1])) {
11437             while (s < send) {
11438                 const char ch = *s++;
11439                 if (!SPACE_OR_TAB(ch)) {
11440                     *d = ch;
11441                     break;
11442                 }
11443             }
11444         }
11445         if (isIDFIRST_lazy_if(d,UTF)) {
11446             d++;
11447             if (UTF) {
11448                 char *end = s;
11449                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11450                     end += UTF8SKIP(end);
11451                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11452                         end += UTF8SKIP(end);
11453                 }
11454                 Copy(s, d, end - s, char);
11455                 d += end - s;
11456                 s = end;
11457             }
11458             else {
11459                 while ((isALNUM(*s) || *s == ':') && d < e)
11460                     *d++ = *s++;
11461                 if (d >= e)
11462                     Perl_croak(aTHX_ ident_too_long);
11463             }
11464             *d = '\0';
11465             while (s < send && SPACE_OR_TAB(*s))
11466                 s++;
11467             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11468                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11469                     const char * const brack =
11470                         (const char *)
11471                         ((*s == '[') ? "[...]" : "{...}");
11472                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11473                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11474                         funny, dest, brack, funny, dest, brack);
11475                 }
11476                 bracket++;
11477                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11478                 return s;
11479             }
11480         }
11481         /* Handle extended ${^Foo} variables
11482          * 1999-02-27 mjd-perl-patch@plover.com */
11483         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11484                  && isALNUM(*s))
11485         {
11486             d++;
11487             while (isALNUM(*s) && d < e) {
11488                 *d++ = *s++;
11489             }
11490             if (d >= e)
11491                 Perl_croak(aTHX_ ident_too_long);
11492             *d = '\0';
11493         }
11494         if (*s == '}') {
11495             s++;
11496             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11497                 PL_lex_state = LEX_INTERPEND;
11498                 PL_expect = XREF;
11499             }
11500             if (PL_lex_state == LEX_NORMAL) {
11501                 if (ckWARN(WARN_AMBIGUOUS) &&
11502                     (keyword(dest, d - dest, 0)
11503                      || get_cvn_flags(dest, d - dest, 0)))
11504                 {
11505                     if (funny == '#')
11506                         funny = '@';
11507                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11508                         "Ambiguous use of %c{%s} resolved to %c%s",
11509                         funny, dest, funny, dest);
11510                 }
11511             }
11512         }
11513         else {
11514             s = bracket;                /* let the parser handle it */
11515             *dest = '\0';
11516         }
11517     }
11518     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11519         PL_lex_state = LEX_INTERPEND;
11520     return s;
11521 }
11522
11523 static U32
11524 S_pmflag(U32 pmfl, const char ch) {
11525     switch (ch) {
11526         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11527     case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
11528     case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
11529     case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
11530     case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
11531     }
11532     return pmfl;
11533 }
11534
11535 void
11536 Perl_pmflag(pTHX_ U32* pmfl, int ch)
11537 {
11538     PERL_ARGS_ASSERT_PMFLAG;
11539
11540     Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11541                      "Perl_pmflag() is deprecated, and will be removed from the XS API");
11542
11543     if (ch<256) {
11544         *pmfl = S_pmflag(*pmfl, (char)ch);
11545     }
11546 }
11547
11548 STATIC char *
11549 S_scan_pat(pTHX_ char *start, I32 type)
11550 {
11551     dVAR;
11552     PMOP *pm;
11553     char *s = scan_str(start,!!PL_madskills,FALSE);
11554     const char * const valid_flags =
11555         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11556 #ifdef PERL_MAD
11557     char *modstart;
11558 #endif
11559
11560     PERL_ARGS_ASSERT_SCAN_PAT;
11561
11562     if (!s) {
11563         const char * const delimiter = skipspace(start);
11564         Perl_croak(aTHX_
11565                    (const char *)
11566                    (*delimiter == '?'
11567                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11568                     : "Search pattern not terminated" ));
11569     }
11570
11571     pm = (PMOP*)newPMOP(type, 0);
11572     if (PL_multi_open == '?') {
11573         /* This is the only point in the code that sets PMf_ONCE:  */
11574         pm->op_pmflags |= PMf_ONCE;
11575
11576         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11577            allows us to restrict the list needed by reset to just the ??
11578            matches.  */
11579         assert(type != OP_TRANS);
11580         if (PL_curstash) {
11581             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11582             U32 elements;
11583             if (!mg) {
11584                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11585                                  0);
11586             }
11587             elements = mg->mg_len / sizeof(PMOP**);
11588             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11589             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11590             mg->mg_len = elements * sizeof(PMOP**);
11591             PmopSTASH_set(pm,PL_curstash);
11592         }
11593     }
11594 #ifdef PERL_MAD
11595     modstart = s;
11596 #endif
11597     while (*s && strchr(valid_flags, *s))
11598         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11599 #ifdef PERL_MAD
11600     if (PL_madskills && modstart != s) {
11601         SV* tmptoken = newSVpvn(modstart, s - modstart);
11602         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11603     }
11604 #endif
11605     /* issue a warning if /c is specified,but /g is not */
11606     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11607     {
11608         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11609                        "Use of /c modifier is meaningless without /g" );
11610     }
11611
11612     PL_lex_op = (OP*)pm;
11613     pl_yylval.ival = OP_MATCH;
11614     return s;
11615 }
11616
11617 STATIC char *
11618 S_scan_subst(pTHX_ char *start)
11619 {
11620     dVAR;
11621     register char *s;
11622     register PMOP *pm;
11623     I32 first_start;
11624     I32 es = 0;
11625 #ifdef PERL_MAD
11626     char *modstart;
11627 #endif
11628
11629     PERL_ARGS_ASSERT_SCAN_SUBST;
11630
11631     pl_yylval.ival = OP_NULL;
11632
11633     s = scan_str(start,!!PL_madskills,FALSE);
11634
11635     if (!s)
11636         Perl_croak(aTHX_ "Substitution pattern not terminated");
11637
11638     if (s[-1] == PL_multi_open)
11639         s--;
11640 #ifdef PERL_MAD
11641     if (PL_madskills) {
11642         CURMAD('q', PL_thisopen);
11643         CURMAD('_', PL_thiswhite);
11644         CURMAD('E', PL_thisstuff);
11645         CURMAD('Q', PL_thisclose);
11646         PL_realtokenstart = s - SvPVX(PL_linestr);
11647     }
11648 #endif
11649
11650     first_start = PL_multi_start;
11651     s = scan_str(s,!!PL_madskills,FALSE);
11652     if (!s) {
11653         if (PL_lex_stuff) {
11654             SvREFCNT_dec(PL_lex_stuff);
11655             PL_lex_stuff = NULL;
11656         }
11657         Perl_croak(aTHX_ "Substitution replacement not terminated");
11658     }
11659     PL_multi_start = first_start;       /* so whole substitution is taken together */
11660
11661     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11662
11663 #ifdef PERL_MAD
11664     if (PL_madskills) {
11665         CURMAD('z', PL_thisopen);
11666         CURMAD('R', PL_thisstuff);
11667         CURMAD('Z', PL_thisclose);
11668     }
11669     modstart = s;
11670 #endif
11671
11672     while (*s) {
11673         if (*s == EXEC_PAT_MOD) {
11674             s++;
11675             es++;
11676         }
11677         else if (strchr(S_PAT_MODS, *s))
11678             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11679         else
11680             break;
11681     }
11682
11683 #ifdef PERL_MAD
11684     if (PL_madskills) {
11685         if (modstart != s)
11686             curmad('m', newSVpvn(modstart, s - modstart));
11687         append_madprops(PL_thismad, (OP*)pm, 0);
11688         PL_thismad = 0;
11689     }
11690 #endif
11691     if ((pm->op_pmflags & PMf_CONTINUE)) {
11692         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11693     }
11694
11695     if (es) {
11696         SV * const repl = newSVpvs("");
11697
11698         PL_sublex_info.super_bufptr = s;
11699         PL_sublex_info.super_bufend = PL_bufend;
11700         PL_multi_end = 0;
11701         pm->op_pmflags |= PMf_EVAL;
11702         while (es-- > 0) {
11703             if (es)
11704                 sv_catpvs(repl, "eval ");
11705             else
11706                 sv_catpvs(repl, "do ");
11707         }
11708         sv_catpvs(repl, "{");
11709         sv_catsv(repl, PL_lex_repl);
11710         if (strchr(SvPVX(PL_lex_repl), '#'))
11711             sv_catpvs(repl, "\n");
11712         sv_catpvs(repl, "}");
11713         SvEVALED_on(repl);
11714         SvREFCNT_dec(PL_lex_repl);
11715         PL_lex_repl = repl;
11716     }
11717
11718     PL_lex_op = (OP*)pm;
11719     pl_yylval.ival = OP_SUBST;
11720     return s;
11721 }
11722
11723 STATIC char *
11724 S_scan_trans(pTHX_ char *start)
11725 {
11726     dVAR;
11727     register char* s;
11728     OP *o;
11729     short *tbl;
11730     U8 squash;
11731     U8 del;
11732     U8 complement;
11733 #ifdef PERL_MAD
11734     char *modstart;
11735 #endif
11736
11737     PERL_ARGS_ASSERT_SCAN_TRANS;
11738
11739     pl_yylval.ival = OP_NULL;
11740
11741     s = scan_str(start,!!PL_madskills,FALSE);
11742     if (!s)
11743         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11744
11745     if (s[-1] == PL_multi_open)
11746         s--;
11747 #ifdef PERL_MAD
11748     if (PL_madskills) {
11749         CURMAD('q', PL_thisopen);
11750         CURMAD('_', PL_thiswhite);
11751         CURMAD('E', PL_thisstuff);
11752         CURMAD('Q', PL_thisclose);
11753         PL_realtokenstart = s - SvPVX(PL_linestr);
11754     }
11755 #endif
11756
11757     s = scan_str(s,!!PL_madskills,FALSE);
11758     if (!s) {
11759         if (PL_lex_stuff) {
11760             SvREFCNT_dec(PL_lex_stuff);
11761             PL_lex_stuff = NULL;
11762         }
11763         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11764     }
11765     if (PL_madskills) {
11766         CURMAD('z', PL_thisopen);
11767         CURMAD('R', PL_thisstuff);
11768         CURMAD('Z', PL_thisclose);
11769     }
11770
11771     complement = del = squash = 0;
11772 #ifdef PERL_MAD
11773     modstart = s;
11774 #endif
11775     while (1) {
11776         switch (*s) {
11777         case 'c':
11778             complement = OPpTRANS_COMPLEMENT;
11779             break;
11780         case 'd':
11781             del = OPpTRANS_DELETE;
11782             break;
11783         case 's':
11784             squash = OPpTRANS_SQUASH;
11785             break;
11786         default:
11787             goto no_more;
11788         }
11789         s++;
11790     }
11791   no_more:
11792
11793     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11794     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11795     o->op_private &= ~OPpTRANS_ALL;
11796     o->op_private |= del|squash|complement|
11797       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11798       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11799
11800     PL_lex_op = o;
11801     pl_yylval.ival = OP_TRANS;
11802
11803 #ifdef PERL_MAD
11804     if (PL_madskills) {
11805         if (modstart != s)
11806             curmad('m', newSVpvn(modstart, s - modstart));
11807         append_madprops(PL_thismad, o, 0);
11808         PL_thismad = 0;
11809     }
11810 #endif
11811
11812     return s;
11813 }
11814
11815 STATIC char *
11816 S_scan_heredoc(pTHX_ register char *s)
11817 {
11818     dVAR;
11819     SV *herewas;
11820     I32 op_type = OP_SCALAR;
11821     I32 len;
11822     SV *tmpstr;
11823     char term;
11824     const char *found_newline;
11825     register char *d;
11826     register char *e;
11827     char *peek;
11828     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11829 #ifdef PERL_MAD
11830     I32 stuffstart = s - SvPVX(PL_linestr);
11831     char *tstart;
11832
11833     PL_realtokenstart = -1;
11834 #endif
11835
11836     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11837
11838     s += 2;
11839     d = PL_tokenbuf;
11840     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11841     if (!outer)
11842         *d++ = '\n';
11843     peek = s;
11844     while (SPACE_OR_TAB(*peek))
11845         peek++;
11846     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11847         s = peek;
11848         term = *s++;
11849         s = delimcpy(d, e, s, PL_bufend, term, &len);
11850         d += len;
11851         if (s < PL_bufend)
11852             s++;
11853     }
11854     else {
11855         if (*s == '\\')
11856             s++, term = '\'';
11857         else
11858             term = '"';
11859         if (!isALNUM_lazy_if(s,UTF))
11860             deprecate("bare << to mean <<\"\"");
11861         for (; isALNUM_lazy_if(s,UTF); s++) {
11862             if (d < e)
11863                 *d++ = *s;
11864         }
11865     }
11866     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11867         Perl_croak(aTHX_ "Delimiter for here document is too long");
11868     *d++ = '\n';
11869     *d = '\0';
11870     len = d - PL_tokenbuf;
11871
11872 #ifdef PERL_MAD
11873     if (PL_madskills) {
11874         tstart = PL_tokenbuf + !outer;
11875         PL_thisclose = newSVpvn(tstart, len - !outer);
11876         tstart = SvPVX(PL_linestr) + stuffstart;
11877         PL_thisopen = newSVpvn(tstart, s - tstart);
11878         stuffstart = s - SvPVX(PL_linestr);
11879     }
11880 #endif
11881 #ifndef PERL_STRICT_CR
11882     d = strchr(s, '\r');
11883     if (d) {
11884         char * const olds = s;
11885         s = d;
11886         while (s < PL_bufend) {
11887             if (*s == '\r') {
11888                 *d++ = '\n';
11889                 if (*++s == '\n')
11890                     s++;
11891             }
11892             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11893                 *d++ = *s++;
11894                 s++;
11895             }
11896             else
11897                 *d++ = *s++;
11898         }
11899         *d = '\0';
11900         PL_bufend = d;
11901         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11902         s = olds;
11903     }
11904 #endif
11905 #ifdef PERL_MAD
11906     found_newline = 0;
11907 #endif
11908     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11909         herewas = newSVpvn(s,PL_bufend-s);
11910     }
11911     else {
11912 #ifdef PERL_MAD
11913         herewas = newSVpvn(s-1,found_newline-s+1);
11914 #else
11915         s--;
11916         herewas = newSVpvn(s,found_newline-s);
11917 #endif
11918     }
11919 #ifdef PERL_MAD
11920     if (PL_madskills) {
11921         tstart = SvPVX(PL_linestr) + stuffstart;
11922         if (PL_thisstuff)
11923             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11924         else
11925             PL_thisstuff = newSVpvn(tstart, s - tstart);
11926     }
11927 #endif
11928     s += SvCUR(herewas);
11929
11930 #ifdef PERL_MAD
11931     stuffstart = s - SvPVX(PL_linestr);
11932
11933     if (found_newline)
11934         s--;
11935 #endif
11936
11937     tmpstr = newSV_type(SVt_PVIV);
11938     SvGROW(tmpstr, 80);
11939     if (term == '\'') {
11940         op_type = OP_CONST;
11941         SvIV_set(tmpstr, -1);
11942     }
11943     else if (term == '`') {
11944         op_type = OP_BACKTICK;
11945         SvIV_set(tmpstr, '\\');
11946     }
11947
11948     CLINE;
11949     PL_multi_start = CopLINE(PL_curcop);
11950     PL_multi_open = PL_multi_close = '<';
11951     term = *PL_tokenbuf;
11952     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11953         char * const bufptr = PL_sublex_info.super_bufptr;
11954         char * const bufend = PL_sublex_info.super_bufend;
11955         char * const olds = s - SvCUR(herewas);
11956         s = strchr(bufptr, '\n');
11957         if (!s)
11958             s = bufend;
11959         d = s;
11960         while (s < bufend &&
11961           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11962             if (*s++ == '\n')
11963                 CopLINE_inc(PL_curcop);
11964         }
11965         if (s >= bufend) {
11966             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11967             missingterm(PL_tokenbuf);
11968         }
11969         sv_setpvn(herewas,bufptr,d-bufptr+1);
11970         sv_setpvn(tmpstr,d+1,s-d);
11971         s += len - 1;
11972         sv_catpvn(herewas,s,bufend-s);
11973         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11974
11975         s = olds;
11976         goto retval;
11977     }
11978     else if (!outer) {
11979         d = s;
11980         while (s < PL_bufend &&
11981           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11982             if (*s++ == '\n')
11983                 CopLINE_inc(PL_curcop);
11984         }
11985         if (s >= PL_bufend) {
11986             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11987             missingterm(PL_tokenbuf);
11988         }
11989         sv_setpvn(tmpstr,d+1,s-d);
11990 #ifdef PERL_MAD
11991         if (PL_madskills) {
11992             if (PL_thisstuff)
11993                 sv_catpvn(PL_thisstuff, d + 1, s - d);
11994             else
11995                 PL_thisstuff = newSVpvn(d + 1, s - d);
11996             stuffstart = s - SvPVX(PL_linestr);
11997         }
11998 #endif
11999         s += len - 1;
12000         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12001
12002         sv_catpvn(herewas,s,PL_bufend-s);
12003         sv_setsv(PL_linestr,herewas);
12004         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12005         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12006         PL_last_lop = PL_last_uni = NULL;
12007     }
12008     else
12009         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12010     while (s >= PL_bufend) {    /* multiple line string? */
12011 #ifdef PERL_MAD
12012         if (PL_madskills) {
12013             tstart = SvPVX(PL_linestr) + stuffstart;
12014             if (PL_thisstuff)
12015                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12016             else
12017                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12018         }
12019 #endif
12020         PL_bufptr = s;
12021         CopLINE_inc(PL_curcop);
12022         if (!outer || !lex_next_chunk(0)) {
12023             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12024             missingterm(PL_tokenbuf);
12025         }
12026         CopLINE_dec(PL_curcop);
12027         s = PL_bufptr;
12028 #ifdef PERL_MAD
12029         stuffstart = s - SvPVX(PL_linestr);
12030 #endif
12031         CopLINE_inc(PL_curcop);
12032         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12033         PL_last_lop = PL_last_uni = NULL;
12034 #ifndef PERL_STRICT_CR
12035         if (PL_bufend - PL_linestart >= 2) {
12036             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12037                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12038             {
12039                 PL_bufend[-2] = '\n';
12040                 PL_bufend--;
12041                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12042             }
12043             else if (PL_bufend[-1] == '\r')
12044                 PL_bufend[-1] = '\n';
12045         }
12046         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12047             PL_bufend[-1] = '\n';
12048 #endif
12049         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12050             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12051             *(SvPVX(PL_linestr) + off ) = ' ';
12052             sv_catsv(PL_linestr,herewas);
12053             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12054             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12055         }
12056         else {
12057             s = PL_bufend;
12058             sv_catsv(tmpstr,PL_linestr);
12059         }
12060     }
12061     s++;
12062 retval:
12063     PL_multi_end = CopLINE(PL_curcop);
12064     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12065         SvPV_shrink_to_cur(tmpstr);
12066     }
12067     SvREFCNT_dec(herewas);
12068     if (!IN_BYTES) {
12069         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12070             SvUTF8_on(tmpstr);
12071         else if (PL_encoding)
12072             sv_recode_to_utf8(tmpstr, PL_encoding);
12073     }
12074     PL_lex_stuff = tmpstr;
12075     pl_yylval.ival = op_type;
12076     return s;
12077 }
12078
12079 /* scan_inputsymbol
12080    takes: current position in input buffer
12081    returns: new position in input buffer
12082    side-effects: pl_yylval and lex_op are set.
12083
12084    This code handles:
12085
12086    <>           read from ARGV
12087    <FH>         read from filehandle
12088    <pkg::FH>    read from package qualified filehandle
12089    <pkg'FH>     read from package qualified filehandle
12090    <$fh>        read from filehandle in $fh
12091    <*.h>        filename glob
12092
12093 */
12094
12095 STATIC char *
12096 S_scan_inputsymbol(pTHX_ char *start)
12097 {
12098     dVAR;
12099     register char *s = start;           /* current position in buffer */
12100     char *end;
12101     I32 len;
12102     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12103     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12104
12105     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12106
12107     end = strchr(s, '\n');
12108     if (!end)
12109         end = PL_bufend;
12110     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12111
12112     /* die if we didn't have space for the contents of the <>,
12113        or if it didn't end, or if we see a newline
12114     */
12115
12116     if (len >= (I32)sizeof PL_tokenbuf)
12117         Perl_croak(aTHX_ "Excessively long <> operator");
12118     if (s >= end)
12119         Perl_croak(aTHX_ "Unterminated <> operator");
12120
12121     s++;
12122
12123     /* check for <$fh>
12124        Remember, only scalar variables are interpreted as filehandles by
12125        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12126        treated as a glob() call.
12127        This code makes use of the fact that except for the $ at the front,
12128        a scalar variable and a filehandle look the same.
12129     */
12130     if (*d == '$' && d[1]) d++;
12131
12132     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12133     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12134         d++;
12135
12136     /* If we've tried to read what we allow filehandles to look like, and
12137        there's still text left, then it must be a glob() and not a getline.
12138        Use scan_str to pull out the stuff between the <> and treat it
12139        as nothing more than a string.
12140     */
12141
12142     if (d - PL_tokenbuf != len) {
12143         pl_yylval.ival = OP_GLOB;
12144         s = scan_str(start,!!PL_madskills,FALSE);
12145         if (!s)
12146            Perl_croak(aTHX_ "Glob not terminated");
12147         return s;
12148     }
12149     else {
12150         bool readline_overriden = FALSE;
12151         GV *gv_readline;
12152         GV **gvp;
12153         /* we're in a filehandle read situation */
12154         d = PL_tokenbuf;
12155
12156         /* turn <> into <ARGV> */
12157         if (!len)
12158             Copy("ARGV",d,5,char);
12159
12160         /* Check whether readline() is overriden */
12161         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12162         if ((gv_readline
12163                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12164                 ||
12165                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12166                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12167                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12168             readline_overriden = TRUE;
12169
12170         /* if <$fh>, create the ops to turn the variable into a
12171            filehandle
12172         */
12173         if (*d == '$') {
12174             /* try to find it in the pad for this block, otherwise find
12175                add symbol table ops
12176             */
12177             const PADOFFSET tmp = pad_findmy(d, len, 0);
12178             if (tmp != NOT_IN_PAD) {
12179                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12180                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12181                     HEK * const stashname = HvNAME_HEK(stash);
12182                     SV * const sym = sv_2mortal(newSVhek(stashname));
12183                     sv_catpvs(sym, "::");
12184                     sv_catpv(sym, d+1);
12185                     d = SvPVX(sym);
12186                     goto intro_sym;
12187                 }
12188                 else {
12189                     OP * const o = newOP(OP_PADSV, 0);
12190                     o->op_targ = tmp;
12191                     PL_lex_op = readline_overriden
12192                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12193                                 append_elem(OP_LIST, o,
12194                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12195                         : (OP*)newUNOP(OP_READLINE, 0, o);
12196                 }
12197             }
12198             else {
12199                 GV *gv;
12200                 ++d;
12201 intro_sym:
12202                 gv = gv_fetchpv(d,
12203                                 (PL_in_eval
12204                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12205                                  : GV_ADDMULTI),
12206                                 SVt_PV);
12207                 PL_lex_op = readline_overriden
12208                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12209                             append_elem(OP_LIST,
12210                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12211                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12212                     : (OP*)newUNOP(OP_READLINE, 0,
12213                             newUNOP(OP_RV2SV, 0,
12214                                 newGVOP(OP_GV, 0, gv)));
12215             }
12216             if (!readline_overriden)
12217                 PL_lex_op->op_flags |= OPf_SPECIAL;
12218             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12219             pl_yylval.ival = OP_NULL;
12220         }
12221
12222         /* If it's none of the above, it must be a literal filehandle
12223            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12224         else {
12225             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12226             PL_lex_op = readline_overriden
12227                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12228                         append_elem(OP_LIST,
12229                             newGVOP(OP_GV, 0, gv),
12230                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12231                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12232             pl_yylval.ival = OP_NULL;
12233         }
12234     }
12235
12236     return s;
12237 }
12238
12239
12240 /* scan_str
12241    takes: start position in buffer
12242           keep_quoted preserve \ on the embedded delimiter(s)
12243           keep_delims preserve the delimiters around the string
12244    returns: position to continue reading from buffer
12245    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12246         updates the read buffer.
12247
12248    This subroutine pulls a string out of the input.  It is called for:
12249         q               single quotes           q(literal text)
12250         '               single quotes           'literal text'
12251         qq              double quotes           qq(interpolate $here please)
12252         "               double quotes           "interpolate $here please"
12253         qx              backticks               qx(/bin/ls -l)
12254         `               backticks               `/bin/ls -l`
12255         qw              quote words             @EXPORT_OK = qw( func() $spam )
12256         m//             regexp match            m/this/
12257         s///            regexp substitute       s/this/that/
12258         tr///           string transliterate    tr/this/that/
12259         y///            string transliterate    y/this/that/
12260         ($*@)           sub prototypes          sub foo ($)
12261         (stuff)         sub attr parameters     sub foo : attr(stuff)
12262         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12263
12264    In most of these cases (all but <>, patterns and transliterate)
12265    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12266    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12267    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12268    calls scan_str().
12269
12270    It skips whitespace before the string starts, and treats the first
12271    character as the delimiter.  If the delimiter is one of ([{< then
12272    the corresponding "close" character )]}> is used as the closing
12273    delimiter.  It allows quoting of delimiters, and if the string has
12274    balanced delimiters ([{<>}]) it allows nesting.
12275
12276    On success, the SV with the resulting string is put into lex_stuff or,
12277    if that is already non-NULL, into lex_repl. The second case occurs only
12278    when parsing the RHS of the special constructs s/// and tr/// (y///).
12279    For convenience, the terminating delimiter character is stuffed into
12280    SvIVX of the SV.
12281 */
12282
12283 STATIC char *
12284 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12285 {
12286     dVAR;
12287     SV *sv;                             /* scalar value: string */
12288     const char *tmps;                   /* temp string, used for delimiter matching */
12289     register char *s = start;           /* current position in the buffer */
12290     register char term;                 /* terminating character */
12291     register char *to;                  /* current position in the sv's data */
12292     I32 brackets = 1;                   /* bracket nesting level */
12293     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12294     I32 termcode;                       /* terminating char. code */
12295     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12296     STRLEN termlen;                     /* length of terminating string */
12297     int last_off = 0;                   /* last position for nesting bracket */
12298 #ifdef PERL_MAD
12299     int stuffstart;
12300     char *tstart;
12301 #endif
12302
12303     PERL_ARGS_ASSERT_SCAN_STR;
12304
12305     /* skip space before the delimiter */
12306     if (isSPACE(*s)) {
12307         s = PEEKSPACE(s);
12308     }
12309
12310 #ifdef PERL_MAD
12311     if (PL_realtokenstart >= 0) {
12312         stuffstart = PL_realtokenstart;
12313         PL_realtokenstart = -1;
12314     }
12315     else
12316         stuffstart = start - SvPVX(PL_linestr);
12317 #endif
12318     /* mark where we are, in case we need to report errors */
12319     CLINE;
12320
12321     /* after skipping whitespace, the next character is the terminator */
12322     term = *s;
12323     if (!UTF) {
12324         termcode = termstr[0] = term;
12325         termlen = 1;
12326     }
12327     else {
12328         termcode = utf8_to_uvchr((U8*)s, &termlen);
12329         Copy(s, termstr, termlen, U8);
12330         if (!UTF8_IS_INVARIANT(term))
12331             has_utf8 = TRUE;
12332     }
12333
12334     /* mark where we are */
12335     PL_multi_start = CopLINE(PL_curcop);
12336     PL_multi_open = term;
12337
12338     /* find corresponding closing delimiter */
12339     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12340         termcode = termstr[0] = term = tmps[5];
12341
12342     PL_multi_close = term;
12343
12344     /* create a new SV to hold the contents.  79 is the SV's initial length.
12345        What a random number. */
12346     sv = newSV_type(SVt_PVIV);
12347     SvGROW(sv, 80);
12348     SvIV_set(sv, termcode);
12349     (void)SvPOK_only(sv);               /* validate pointer */
12350
12351     /* move past delimiter and try to read a complete string */
12352     if (keep_delims)
12353         sv_catpvn(sv, s, termlen);
12354     s += termlen;
12355 #ifdef PERL_MAD
12356     tstart = SvPVX(PL_linestr) + stuffstart;
12357     if (!PL_thisopen && !keep_delims) {
12358         PL_thisopen = newSVpvn(tstart, s - tstart);
12359         stuffstart = s - SvPVX(PL_linestr);
12360     }
12361 #endif
12362     for (;;) {
12363         if (PL_encoding && !UTF) {
12364             bool cont = TRUE;
12365
12366             while (cont) {
12367                 int offset = s - SvPVX_const(PL_linestr);
12368                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12369                                            &offset, (char*)termstr, termlen);
12370                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12371                 char * const svlast = SvEND(sv) - 1;
12372
12373                 for (; s < ns; s++) {
12374                     if (*s == '\n' && !PL_rsfp)
12375                         CopLINE_inc(PL_curcop);
12376                 }
12377                 if (!found)
12378                     goto read_more_line;
12379                 else {
12380                     /* handle quoted delimiters */
12381                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12382                         const char *t;
12383                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12384                             t--;
12385                         if ((svlast-1 - t) % 2) {
12386                             if (!keep_quoted) {
12387                                 *(svlast-1) = term;
12388                                 *svlast = '\0';
12389                                 SvCUR_set(sv, SvCUR(sv) - 1);
12390                             }
12391                             continue;
12392                         }
12393                     }
12394                     if (PL_multi_open == PL_multi_close) {
12395                         cont = FALSE;
12396                     }
12397                     else {
12398                         const char *t;
12399                         char *w;
12400                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12401                             /* At here, all closes are "was quoted" one,
12402                                so we don't check PL_multi_close. */
12403                             if (*t == '\\') {
12404                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12405                                     t++;
12406                                 else
12407                                     *w++ = *t++;
12408                             }
12409                             else if (*t == PL_multi_open)
12410                                 brackets++;
12411
12412                             *w = *t;
12413                         }
12414                         if (w < t) {
12415                             *w++ = term;
12416                             *w = '\0';
12417                             SvCUR_set(sv, w - SvPVX_const(sv));
12418                         }
12419                         last_off = w - SvPVX(sv);
12420                         if (--brackets <= 0)
12421                             cont = FALSE;
12422                     }
12423                 }
12424             }
12425             if (!keep_delims) {
12426                 SvCUR_set(sv, SvCUR(sv) - 1);
12427                 *SvEND(sv) = '\0';
12428             }
12429             break;
12430         }
12431
12432         /* extend sv if need be */
12433         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12434         /* set 'to' to the next character in the sv's string */
12435         to = SvPVX(sv)+SvCUR(sv);
12436
12437         /* if open delimiter is the close delimiter read unbridle */
12438         if (PL_multi_open == PL_multi_close) {
12439             for (; s < PL_bufend; s++,to++) {
12440                 /* embedded newlines increment the current line number */
12441                 if (*s == '\n' && !PL_rsfp)
12442                     CopLINE_inc(PL_curcop);
12443                 /* handle quoted delimiters */
12444                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12445                     if (!keep_quoted && s[1] == term)
12446                         s++;
12447                 /* any other quotes are simply copied straight through */
12448                     else
12449                         *to++ = *s++;
12450                 }
12451                 /* terminate when run out of buffer (the for() condition), or
12452                    have found the terminator */
12453                 else if (*s == term) {
12454                     if (termlen == 1)
12455                         break;
12456                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12457                         break;
12458                 }
12459                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12460                     has_utf8 = TRUE;
12461                 *to = *s;
12462             }
12463         }
12464
12465         /* if the terminator isn't the same as the start character (e.g.,
12466            matched brackets), we have to allow more in the quoting, and
12467            be prepared for nested brackets.
12468         */
12469         else {
12470             /* read until we run out of string, or we find the terminator */
12471             for (; s < PL_bufend; s++,to++) {
12472                 /* embedded newlines increment the line count */
12473                 if (*s == '\n' && !PL_rsfp)
12474                     CopLINE_inc(PL_curcop);
12475                 /* backslashes can escape the open or closing characters */
12476                 if (*s == '\\' && s+1 < PL_bufend) {
12477                     if (!keep_quoted &&
12478                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12479                         s++;
12480                     else
12481                         *to++ = *s++;
12482                 }
12483                 /* allow nested opens and closes */
12484                 else if (*s == PL_multi_close && --brackets <= 0)
12485                     break;
12486                 else if (*s == PL_multi_open)
12487                     brackets++;
12488                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12489                     has_utf8 = TRUE;
12490                 *to = *s;
12491             }
12492         }
12493         /* terminate the copied string and update the sv's end-of-string */
12494         *to = '\0';
12495         SvCUR_set(sv, to - SvPVX_const(sv));
12496
12497         /*
12498          * this next chunk reads more into the buffer if we're not done yet
12499          */
12500
12501         if (s < PL_bufend)
12502             break;              /* handle case where we are done yet :-) */
12503
12504 #ifndef PERL_STRICT_CR
12505         if (to - SvPVX_const(sv) >= 2) {
12506             if ((to[-2] == '\r' && to[-1] == '\n') ||
12507                 (to[-2] == '\n' && to[-1] == '\r'))
12508             {
12509                 to[-2] = '\n';
12510                 to--;
12511                 SvCUR_set(sv, to - SvPVX_const(sv));
12512             }
12513             else if (to[-1] == '\r')
12514                 to[-1] = '\n';
12515         }
12516         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12517             to[-1] = '\n';
12518 #endif
12519
12520      read_more_line:
12521         /* if we're out of file, or a read fails, bail and reset the current
12522            line marker so we can report where the unterminated string began
12523         */
12524 #ifdef PERL_MAD
12525         if (PL_madskills) {
12526             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12527             if (PL_thisstuff)
12528                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12529             else
12530                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12531         }
12532 #endif
12533         CopLINE_inc(PL_curcop);
12534         PL_bufptr = PL_bufend;
12535         if (!lex_next_chunk(0)) {
12536             sv_free(sv);
12537             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12538             return NULL;
12539         }
12540         s = PL_bufptr;
12541 #ifdef PERL_MAD
12542         stuffstart = 0;
12543 #endif
12544     }
12545
12546     /* at this point, we have successfully read the delimited string */
12547
12548     if (!PL_encoding || UTF) {
12549 #ifdef PERL_MAD
12550         if (PL_madskills) {
12551             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12552             const int len = s - tstart;
12553             if (PL_thisstuff)
12554                 sv_catpvn(PL_thisstuff, tstart, len);
12555             else
12556                 PL_thisstuff = newSVpvn(tstart, len);
12557             if (!PL_thisclose && !keep_delims)
12558                 PL_thisclose = newSVpvn(s,termlen);
12559         }
12560 #endif
12561
12562         if (keep_delims)
12563             sv_catpvn(sv, s, termlen);
12564         s += termlen;
12565     }
12566 #ifdef PERL_MAD
12567     else {
12568         if (PL_madskills) {
12569             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12570             const int len = s - tstart - termlen;
12571             if (PL_thisstuff)
12572                 sv_catpvn(PL_thisstuff, tstart, len);
12573             else
12574                 PL_thisstuff = newSVpvn(tstart, len);
12575             if (!PL_thisclose && !keep_delims)
12576                 PL_thisclose = newSVpvn(s - termlen,termlen);
12577         }
12578     }
12579 #endif
12580     if (has_utf8 || PL_encoding)
12581         SvUTF8_on(sv);
12582
12583     PL_multi_end = CopLINE(PL_curcop);
12584
12585     /* if we allocated too much space, give some back */
12586     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12587         SvLEN_set(sv, SvCUR(sv) + 1);
12588         SvPV_renew(sv, SvLEN(sv));
12589     }
12590
12591     /* decide whether this is the first or second quoted string we've read
12592        for this op
12593     */
12594
12595     if (PL_lex_stuff)
12596         PL_lex_repl = sv;
12597     else
12598         PL_lex_stuff = sv;
12599     return s;
12600 }
12601
12602 /*
12603   scan_num
12604   takes: pointer to position in buffer
12605   returns: pointer to new position in buffer
12606   side-effects: builds ops for the constant in pl_yylval.op
12607
12608   Read a number in any of the formats that Perl accepts:
12609
12610   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12611   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12612   0b[01](_?[01])*
12613   0[0-7](_?[0-7])*
12614   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12615
12616   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12617   thing it reads.
12618
12619   If it reads a number without a decimal point or an exponent, it will
12620   try converting the number to an integer and see if it can do so
12621   without loss of precision.
12622 */
12623
12624 char *
12625 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12626 {
12627     dVAR;
12628     register const char *s = start;     /* current position in buffer */
12629     register char *d;                   /* destination in temp buffer */
12630     register char *e;                   /* end of temp buffer */
12631     NV nv;                              /* number read, as a double */
12632     SV *sv = NULL;                      /* place to put the converted number */
12633     bool floatit;                       /* boolean: int or float? */
12634     const char *lastub = NULL;          /* position of last underbar */
12635     static char const number_too_long[] = "Number too long";
12636
12637     PERL_ARGS_ASSERT_SCAN_NUM;
12638
12639     /* We use the first character to decide what type of number this is */
12640
12641     switch (*s) {
12642     default:
12643       Perl_croak(aTHX_ "panic: scan_num");
12644
12645     /* if it starts with a 0, it could be an octal number, a decimal in
12646        0.13 disguise, or a hexadecimal number, or a binary number. */
12647     case '0':
12648         {
12649           /* variables:
12650              u          holds the "number so far"
12651              shift      the power of 2 of the base
12652                         (hex == 4, octal == 3, binary == 1)
12653              overflowed was the number more than we can hold?
12654
12655              Shift is used when we add a digit.  It also serves as an "are
12656              we in octal/hex/binary?" indicator to disallow hex characters
12657              when in octal mode.
12658            */
12659             NV n = 0.0;
12660             UV u = 0;
12661             I32 shift;
12662             bool overflowed = FALSE;
12663             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12664             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12665             static const char* const bases[5] =
12666               { "", "binary", "", "octal", "hexadecimal" };
12667             static const char* const Bases[5] =
12668               { "", "Binary", "", "Octal", "Hexadecimal" };
12669             static const char* const maxima[5] =
12670               { "",
12671                 "0b11111111111111111111111111111111",
12672                 "",
12673                 "037777777777",
12674                 "0xffffffff" };
12675             const char *base, *Base, *max;
12676
12677             /* check for hex */
12678             if (s[1] == 'x') {
12679                 shift = 4;
12680                 s += 2;
12681                 just_zero = FALSE;
12682             } else if (s[1] == 'b') {
12683                 shift = 1;
12684                 s += 2;
12685                 just_zero = FALSE;
12686             }
12687             /* check for a decimal in disguise */
12688             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12689                 goto decimal;
12690             /* so it must be octal */
12691             else {
12692                 shift = 3;
12693                 s++;
12694             }
12695
12696             if (*s == '_') {
12697                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12698                                "Misplaced _ in number");
12699                lastub = s++;
12700             }
12701
12702             base = bases[shift];
12703             Base = Bases[shift];
12704             max  = maxima[shift];
12705
12706             /* read the rest of the number */
12707             for (;;) {
12708                 /* x is used in the overflow test,
12709                    b is the digit we're adding on. */
12710                 UV x, b;
12711
12712                 switch (*s) {
12713
12714                 /* if we don't mention it, we're done */
12715                 default:
12716                     goto out;
12717
12718                 /* _ are ignored -- but warned about if consecutive */
12719                 case '_':
12720                     if (lastub && s == lastub + 1)
12721                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12722                                        "Misplaced _ in number");
12723                     lastub = s++;
12724                     break;
12725
12726                 /* 8 and 9 are not octal */
12727                 case '8': case '9':
12728                     if (shift == 3)
12729                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12730                     /* FALL THROUGH */
12731
12732                 /* octal digits */
12733                 case '2': case '3': case '4':
12734                 case '5': case '6': case '7':
12735                     if (shift == 1)
12736                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12737                     /* FALL THROUGH */
12738
12739                 case '0': case '1':
12740                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12741                     goto digit;
12742
12743                 /* hex digits */
12744                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12745                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12746                     /* make sure they said 0x */
12747                     if (shift != 4)
12748                         goto out;
12749                     b = (*s++ & 7) + 9;
12750
12751                     /* Prepare to put the digit we have onto the end
12752                        of the number so far.  We check for overflows.
12753                     */
12754
12755                   digit:
12756                     just_zero = FALSE;
12757                     if (!overflowed) {
12758                         x = u << shift; /* make room for the digit */
12759
12760                         if ((x >> shift) != u
12761                             && !(PL_hints & HINT_NEW_BINARY)) {
12762                             overflowed = TRUE;
12763                             n = (NV) u;
12764                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12765                                              "Integer overflow in %s number",
12766                                              base);
12767                         } else
12768                             u = x | b;          /* add the digit to the end */
12769                     }
12770                     if (overflowed) {
12771                         n *= nvshift[shift];
12772                         /* If an NV has not enough bits in its
12773                          * mantissa to represent an UV this summing of
12774                          * small low-order numbers is a waste of time
12775                          * (because the NV cannot preserve the
12776                          * low-order bits anyway): we could just
12777                          * remember when did we overflow and in the
12778                          * end just multiply n by the right
12779                          * amount. */
12780                         n += (NV) b;
12781                     }
12782                     break;
12783                 }
12784             }
12785
12786           /* if we get here, we had success: make a scalar value from
12787              the number.
12788           */
12789           out:
12790
12791             /* final misplaced underbar check */
12792             if (s[-1] == '_') {
12793                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12794             }
12795
12796             sv = newSV(0);
12797             if (overflowed) {
12798                 if (n > 4294967295.0)
12799                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12800                                    "%s number > %s non-portable",
12801                                    Base, max);
12802                 sv_setnv(sv, n);
12803             }
12804             else {
12805 #if UVSIZE > 4
12806                 if (u > 0xffffffff)
12807                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12808                                    "%s number > %s non-portable",
12809                                    Base, max);
12810 #endif
12811                 sv_setuv(sv, u);
12812             }
12813             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12814                 sv = new_constant(start, s - start, "integer",
12815                                   sv, NULL, NULL, 0);
12816             else if (PL_hints & HINT_NEW_BINARY)
12817                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12818         }
12819         break;
12820
12821     /*
12822       handle decimal numbers.
12823       we're also sent here when we read a 0 as the first digit
12824     */
12825     case '1': case '2': case '3': case '4': case '5':
12826     case '6': case '7': case '8': case '9': case '.':
12827       decimal:
12828         d = PL_tokenbuf;
12829         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12830         floatit = FALSE;
12831
12832         /* read next group of digits and _ and copy into d */
12833         while (isDIGIT(*s) || *s == '_') {
12834             /* skip underscores, checking for misplaced ones
12835                if -w is on
12836             */
12837             if (*s == '_') {
12838                 if (lastub && s == lastub + 1)
12839                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12840                                    "Misplaced _ in number");
12841                 lastub = s++;
12842             }
12843             else {
12844                 /* check for end of fixed-length buffer */
12845                 if (d >= e)
12846                     Perl_croak(aTHX_ number_too_long);
12847                 /* if we're ok, copy the character */
12848                 *d++ = *s++;
12849             }
12850         }
12851
12852         /* final misplaced underbar check */
12853         if (lastub && s == lastub + 1) {
12854             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12855         }
12856
12857         /* read a decimal portion if there is one.  avoid
12858            3..5 being interpreted as the number 3. followed
12859            by .5
12860         */
12861         if (*s == '.' && s[1] != '.') {
12862             floatit = TRUE;
12863             *d++ = *s++;
12864
12865             if (*s == '_') {
12866                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12867                                "Misplaced _ in number");
12868                 lastub = s;
12869             }
12870
12871             /* copy, ignoring underbars, until we run out of digits.
12872             */
12873             for (; isDIGIT(*s) || *s == '_'; s++) {
12874                 /* fixed length buffer check */
12875                 if (d >= e)
12876                     Perl_croak(aTHX_ number_too_long);
12877                 if (*s == '_') {
12878                    if (lastub && s == lastub + 1)
12879                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12880                                       "Misplaced _ in number");
12881                    lastub = s;
12882                 }
12883                 else
12884                     *d++ = *s;
12885             }
12886             /* fractional part ending in underbar? */
12887             if (s[-1] == '_') {
12888                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12889                                "Misplaced _ in number");
12890             }
12891             if (*s == '.' && isDIGIT(s[1])) {
12892                 /* oops, it's really a v-string, but without the "v" */
12893                 s = start;
12894                 goto vstring;
12895             }
12896         }
12897
12898         /* read exponent part, if present */
12899         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12900             floatit = TRUE;
12901             s++;
12902
12903             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12904             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12905
12906             /* stray preinitial _ */
12907             if (*s == '_') {
12908                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12909                                "Misplaced _ in number");
12910                 lastub = s++;
12911             }
12912
12913             /* allow positive or negative exponent */
12914             if (*s == '+' || *s == '-')
12915                 *d++ = *s++;
12916
12917             /* stray initial _ */
12918             if (*s == '_') {
12919                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12920                                "Misplaced _ in number");
12921                 lastub = s++;
12922             }
12923
12924             /* read digits of exponent */
12925             while (isDIGIT(*s) || *s == '_') {
12926                 if (isDIGIT(*s)) {
12927                     if (d >= e)
12928                         Perl_croak(aTHX_ number_too_long);
12929                     *d++ = *s++;
12930                 }
12931                 else {
12932                    if (((lastub && s == lastub + 1) ||
12933                         (!isDIGIT(s[1]) && s[1] != '_')))
12934                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12935                                       "Misplaced _ in number");
12936                    lastub = s++;
12937                 }
12938             }
12939         }
12940
12941
12942         /* make an sv from the string */
12943         sv = newSV(0);
12944
12945         /*
12946            We try to do an integer conversion first if no characters
12947            indicating "float" have been found.
12948          */
12949
12950         if (!floatit) {
12951             UV uv;
12952             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12953
12954             if (flags == IS_NUMBER_IN_UV) {
12955               if (uv <= IV_MAX)
12956                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12957               else
12958                 sv_setuv(sv, uv);
12959             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12960               if (uv <= (UV) IV_MIN)
12961                 sv_setiv(sv, -(IV)uv);
12962               else
12963                 floatit = TRUE;
12964             } else
12965               floatit = TRUE;
12966         }
12967         if (floatit) {
12968             /* terminate the string */
12969             *d = '\0';
12970             nv = Atof(PL_tokenbuf);
12971             sv_setnv(sv, nv);
12972         }
12973
12974         if ( floatit
12975              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12976             const char *const key = floatit ? "float" : "integer";
12977             const STRLEN keylen = floatit ? 5 : 7;
12978             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12979                                 key, keylen, sv, NULL, NULL, 0);
12980         }
12981         break;
12982
12983     /* if it starts with a v, it could be a v-string */
12984     case 'v':
12985 vstring:
12986                 sv = newSV(5); /* preallocate storage space */
12987                 s = scan_vstring(s, PL_bufend, sv);
12988         break;
12989     }
12990
12991     /* make the op for the constant and return */
12992
12993     if (sv)
12994         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12995     else
12996         lvalp->opval = NULL;
12997
12998     return (char *)s;
12999 }
13000
13001 STATIC char *
13002 S_scan_formline(pTHX_ register char *s)
13003 {
13004     dVAR;
13005     register char *eol;
13006     register char *t;
13007     SV * const stuff = newSVpvs("");
13008     bool needargs = FALSE;
13009     bool eofmt = FALSE;
13010 #ifdef PERL_MAD
13011     char *tokenstart = s;
13012     SV* savewhite = NULL;
13013
13014     if (PL_madskills) {
13015         savewhite = PL_thiswhite;
13016         PL_thiswhite = 0;
13017     }
13018 #endif
13019
13020     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13021
13022     while (!needargs) {
13023         if (*s == '.') {
13024             t = s+1;
13025 #ifdef PERL_STRICT_CR
13026             while (SPACE_OR_TAB(*t))
13027                 t++;
13028 #else
13029             while (SPACE_OR_TAB(*t) || *t == '\r')
13030                 t++;
13031 #endif
13032             if (*t == '\n' || t == PL_bufend) {
13033                 eofmt = TRUE;
13034                 break;
13035             }
13036         }
13037         if (PL_in_eval && !PL_rsfp) {
13038             eol = (char *) memchr(s,'\n',PL_bufend-s);
13039             if (!eol++)
13040                 eol = PL_bufend;
13041         }
13042         else
13043             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13044         if (*s != '#') {
13045             for (t = s; t < eol; t++) {
13046                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13047                     needargs = FALSE;
13048                     goto enough;        /* ~~ must be first line in formline */
13049                 }
13050                 if (*t == '@' || *t == '^')
13051                     needargs = TRUE;
13052             }
13053             if (eol > s) {
13054                 sv_catpvn(stuff, s, eol-s);
13055 #ifndef PERL_STRICT_CR
13056                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13057                     char *end = SvPVX(stuff) + SvCUR(stuff);
13058                     end[-2] = '\n';
13059                     end[-1] = '\0';
13060                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13061                 }
13062 #endif
13063             }
13064             else
13065               break;
13066         }
13067         s = (char*)eol;
13068         if (PL_rsfp) {
13069             bool got_some;
13070 #ifdef PERL_MAD
13071             if (PL_madskills) {
13072                 if (PL_thistoken)
13073                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13074                 else
13075                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13076             }
13077 #endif
13078             PL_bufptr = PL_bufend;
13079             CopLINE_inc(PL_curcop);
13080             got_some = lex_next_chunk(0);
13081             CopLINE_dec(PL_curcop);
13082             s = PL_bufptr;
13083 #ifdef PERL_MAD
13084             tokenstart = PL_bufptr;
13085 #endif
13086             if (!got_some)
13087                 break;
13088         }
13089         incline(s);
13090     }
13091   enough:
13092     if (SvCUR(stuff)) {
13093         PL_expect = XTERM;
13094         if (needargs) {
13095             PL_lex_state = LEX_NORMAL;
13096             start_force(PL_curforce);
13097             NEXTVAL_NEXTTOKE.ival = 0;
13098             force_next(',');
13099         }
13100         else
13101             PL_lex_state = LEX_FORMLINE;
13102         if (!IN_BYTES) {
13103             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13104                 SvUTF8_on(stuff);
13105             else if (PL_encoding)
13106                 sv_recode_to_utf8(stuff, PL_encoding);
13107         }
13108         start_force(PL_curforce);
13109         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13110         force_next(THING);
13111         start_force(PL_curforce);
13112         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13113         force_next(LSTOP);
13114     }
13115     else {
13116         SvREFCNT_dec(stuff);
13117         if (eofmt)
13118             PL_lex_formbrack = 0;
13119         PL_bufptr = s;
13120     }
13121 #ifdef PERL_MAD
13122     if (PL_madskills) {
13123         if (PL_thistoken)
13124             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13125         else
13126             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13127         PL_thiswhite = savewhite;
13128     }
13129 #endif
13130     return s;
13131 }
13132
13133 I32
13134 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13135 {
13136     dVAR;
13137     const I32 oldsavestack_ix = PL_savestack_ix;
13138     CV* const outsidecv = PL_compcv;
13139
13140     if (PL_compcv) {
13141         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13142     }
13143     SAVEI32(PL_subline);
13144     save_item(PL_subname);
13145     SAVESPTR(PL_compcv);
13146
13147     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13148     CvFLAGS(PL_compcv) |= flags;
13149
13150     PL_subline = CopLINE(PL_curcop);
13151     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13152     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13153     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13154
13155     return oldsavestack_ix;
13156 }
13157
13158 #ifdef __SC__
13159 #pragma segment Perl_yylex
13160 #endif
13161 static int
13162 S_yywarn(pTHX_ const char *const s)
13163 {
13164     dVAR;
13165
13166     PERL_ARGS_ASSERT_YYWARN;
13167
13168     PL_in_eval |= EVAL_WARNONLY;
13169     yyerror(s);
13170     PL_in_eval &= ~EVAL_WARNONLY;
13171     return 0;
13172 }
13173
13174 int
13175 Perl_yyerror(pTHX_ const char *const s)
13176 {
13177     dVAR;
13178     const char *where = NULL;
13179     const char *context = NULL;
13180     int contlen = -1;
13181     SV *msg;
13182     int yychar  = PL_parser->yychar;
13183
13184     PERL_ARGS_ASSERT_YYERROR;
13185
13186     if (!yychar || (yychar == ';' && !PL_rsfp))
13187         where = "at EOF";
13188     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13189       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13190       PL_oldbufptr != PL_bufptr) {
13191         /*
13192                 Only for NetWare:
13193                 The code below is removed for NetWare because it abends/crashes on NetWare
13194                 when the script has error such as not having the closing quotes like:
13195                     if ($var eq "value)
13196                 Checking of white spaces is anyway done in NetWare code.
13197         */
13198 #ifndef NETWARE
13199         while (isSPACE(*PL_oldoldbufptr))
13200             PL_oldoldbufptr++;
13201 #endif
13202         context = PL_oldoldbufptr;
13203         contlen = PL_bufptr - PL_oldoldbufptr;
13204     }
13205     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13206       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13207         /*
13208                 Only for NetWare:
13209                 The code below is removed for NetWare because it abends/crashes on NetWare
13210                 when the script has error such as not having the closing quotes like:
13211                     if ($var eq "value)
13212                 Checking of white spaces is anyway done in NetWare code.
13213         */
13214 #ifndef NETWARE
13215         while (isSPACE(*PL_oldbufptr))
13216             PL_oldbufptr++;
13217 #endif
13218         context = PL_oldbufptr;
13219         contlen = PL_bufptr - PL_oldbufptr;
13220     }
13221     else if (yychar > 255)
13222         where = "next token ???";
13223     else if (yychar == -2) { /* YYEMPTY */
13224         if (PL_lex_state == LEX_NORMAL ||
13225            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13226             where = "at end of line";
13227         else if (PL_lex_inpat)
13228             where = "within pattern";
13229         else
13230             where = "within string";
13231     }
13232     else {
13233         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13234         if (yychar < 32)
13235             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13236         else if (isPRINT_LC(yychar)) {
13237             const char string = yychar;
13238             sv_catpvn(where_sv, &string, 1);
13239         }
13240         else
13241             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13242         where = SvPVX_const(where_sv);
13243     }
13244     msg = sv_2mortal(newSVpv(s, 0));
13245     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13246         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13247     if (context)
13248         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13249     else
13250         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13251     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13252         Perl_sv_catpvf(aTHX_ msg,
13253         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13254                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13255         PL_multi_end = 0;
13256     }
13257     if (PL_in_eval & EVAL_WARNONLY) {
13258         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13259     }
13260     else
13261         qerror(msg);
13262     if (PL_error_count >= 10) {
13263         if (PL_in_eval && SvCUR(ERRSV))
13264             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13265                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13266         else
13267             Perl_croak(aTHX_ "%s has too many errors.\n",
13268             OutCopFILE(PL_curcop));
13269     }
13270     PL_in_my = 0;
13271     PL_in_my_stash = NULL;
13272     return 0;
13273 }
13274 #ifdef __SC__
13275 #pragma segment Main
13276 #endif
13277
13278 STATIC char*
13279 S_swallow_bom(pTHX_ U8 *s)
13280 {
13281     dVAR;
13282     const STRLEN slen = SvCUR(PL_linestr);
13283
13284     PERL_ARGS_ASSERT_SWALLOW_BOM;
13285
13286     switch (s[0]) {
13287     case 0xFF:
13288         if (s[1] == 0xFE) {
13289             /* UTF-16 little-endian? (or UTF32-LE?) */
13290             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13291                 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
13292 #ifndef PERL_NO_UTF16_FILTER
13293             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
13294             s += 2;
13295             if (PL_bufend > (char*)s) {
13296                 s = add_utf16_textfilter(s, TRUE);
13297             }
13298 #else
13299             Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
13300 #endif
13301         }
13302         break;
13303     case 0xFE:
13304         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13305 #ifndef PERL_NO_UTF16_FILTER
13306             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13307             s += 2;
13308             if (PL_bufend > (char *)s) {
13309                 s = add_utf16_textfilter(s, FALSE);
13310             }
13311 #else
13312             Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
13313 #endif
13314         }
13315         break;
13316     case 0xEF:
13317         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13318             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13319             s += 3;                      /* UTF-8 */
13320         }
13321         break;
13322     case 0:
13323         if (slen > 3) {
13324              if (s[1] == 0) {
13325                   if (s[2] == 0xFE && s[3] == 0xFF) {
13326                        /* UTF-32 big-endian */
13327                        Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
13328                   }
13329              }
13330              else if (s[2] == 0 && s[3] != 0) {
13331                   /* Leading bytes
13332                    * 00 xx 00 xx
13333                    * are a good indicator of UTF-16BE. */
13334                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13335                 s = add_utf16_textfilter(s, FALSE);
13336              }
13337         }
13338 #ifdef EBCDIC
13339     case 0xDD:
13340         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13341             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13342             s += 4;                      /* UTF-8 */
13343         }
13344         break;
13345 #endif
13346
13347     default:
13348          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13349                   /* Leading bytes
13350                    * xx 00 xx 00
13351                    * are a good indicator of UTF-16LE. */
13352               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13353               s = add_utf16_textfilter(s, TRUE);
13354          }
13355     }
13356     return (char*)s;
13357 }
13358
13359
13360 #ifndef PERL_NO_UTF16_FILTER
13361 static I32
13362 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13363 {
13364     dVAR;
13365     SV *const filter = FILTER_DATA(idx);
13366     /* We re-use this each time round, throwing the contents away before we
13367        return.  */
13368     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13369     SV *const utf8_buffer = filter;
13370     IV status = IoPAGE(filter);
13371     const bool reverse = (bool) IoLINES(filter);
13372     I32 retval;
13373
13374     /* As we're automatically added, at the lowest level, and hence only called
13375        from this file, we can be sure that we're not called in block mode. Hence
13376        don't bother writing code to deal with block mode.  */
13377     if (maxlen) {
13378         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13379     }
13380     if (status < 0) {
13381         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13382     }
13383     DEBUG_P(PerlIO_printf(Perl_debug_log,
13384                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13385                           FPTR2DPTR(void *, S_utf16_textfilter),
13386                           reverse ? 'l' : 'b', idx, maxlen, status,
13387                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13388
13389     while (1) {
13390         STRLEN chars;
13391         STRLEN have;
13392         I32 newlen;
13393         U8 *end;
13394         /* First, look in our buffer of existing UTF-8 data:  */
13395         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13396
13397         if (nl) {
13398             ++nl;
13399         } else if (status == 0) {
13400             /* EOF */
13401             IoPAGE(filter) = 0;
13402             nl = SvEND(utf8_buffer);
13403         }
13404         if (nl) {
13405             STRLEN got = nl - SvPVX(utf8_buffer);
13406             /* Did we have anything to append?  */
13407             retval = got != 0;
13408             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13409             /* Everything else in this code works just fine if SVp_POK isn't
13410                set.  This, however, needs it, and we need it to work, else
13411                we loop infinitely because the buffer is never consumed.  */
13412             sv_chop(utf8_buffer, nl);
13413             break;
13414         }
13415
13416         /* OK, not a complete line there, so need to read some more UTF-16.
13417            Read an extra octect if the buffer currently has an odd number. */
13418         while (1) {
13419             if (status <= 0)
13420                 break;
13421             if (SvCUR(utf16_buffer) >= 2) {
13422                 /* Location of the high octet of the last complete code point.
13423                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13424                    *coupled* with all the benefits of partial reads and
13425                    endianness.  */
13426                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13427                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13428
13429                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13430                     break;
13431                 }
13432
13433                 /* We have the first half of a surrogate. Read more.  */
13434                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13435             }
13436
13437             status = FILTER_READ(idx + 1, utf16_buffer,
13438                                  160 + (SvCUR(utf16_buffer) & 1));
13439             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13440             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13441             if (status < 0) {
13442                 /* Error */
13443                 IoPAGE(filter) = status;
13444                 return status;
13445             }
13446         }
13447
13448         chars = SvCUR(utf16_buffer) >> 1;
13449         have = SvCUR(utf8_buffer);
13450         SvGROW(utf8_buffer, have + chars * 3 + 1);
13451
13452         if (reverse) {
13453             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13454                                          (U8*)SvPVX_const(utf8_buffer) + have,
13455                                          chars * 2, &newlen);
13456         } else {
13457             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13458                                 (U8*)SvPVX_const(utf8_buffer) + have,
13459                                 chars * 2, &newlen);
13460         }
13461         SvCUR_set(utf8_buffer, have + newlen);
13462         *end = '\0';
13463
13464         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13465            it's private to us, and utf16_to_utf8{,reversed} take a
13466            (pointer,length) pair, rather than a NUL-terminated string.  */
13467         if(SvCUR(utf16_buffer) & 1) {
13468             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13469             SvCUR_set(utf16_buffer, 1);
13470         } else {
13471             SvCUR_set(utf16_buffer, 0);
13472         }
13473     }
13474     DEBUG_P(PerlIO_printf(Perl_debug_log,
13475                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13476                           status,
13477                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13478     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13479     return retval;
13480 }
13481
13482 static U8 *
13483 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13484 {
13485     SV *filter = filter_add(S_utf16_textfilter, NULL);
13486
13487     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13488     sv_setpvs(filter, "");
13489     IoLINES(filter) = reversed;
13490     IoPAGE(filter) = 1; /* Not EOF */
13491
13492     /* Sadly, we have to return a valid pointer, come what may, so we have to
13493        ignore any error return from this.  */
13494     SvCUR_set(PL_linestr, 0);
13495     if (FILTER_READ(0, PL_linestr, 0)) {
13496         SvUTF8_on(PL_linestr);
13497     } else {
13498         SvUTF8_on(PL_linestr);
13499     }
13500     PL_bufend = SvEND(PL_linestr);
13501     return (U8*)SvPVX(PL_linestr);
13502 }
13503 #endif
13504
13505 /*
13506 Returns a pointer to the next character after the parsed
13507 vstring, as well as updating the passed in sv.
13508
13509 Function must be called like
13510
13511         sv = newSV(5);
13512         s = scan_vstring(s,e,sv);
13513
13514 where s and e are the start and end of the string.
13515 The sv should already be large enough to store the vstring
13516 passed in, for performance reasons.
13517
13518 */
13519
13520 char *
13521 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13522 {
13523     dVAR;
13524     const char *pos = s;
13525     const char *start = s;
13526
13527     PERL_ARGS_ASSERT_SCAN_VSTRING;
13528
13529     if (*pos == 'v') pos++;  /* get past 'v' */
13530     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13531         pos++;
13532     if ( *pos != '.') {
13533         /* this may not be a v-string if followed by => */
13534         const char *next = pos;
13535         while (next < e && isSPACE(*next))
13536             ++next;
13537         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13538             /* return string not v-string */
13539             sv_setpvn(sv,(char *)s,pos-s);
13540             return (char *)pos;
13541         }
13542     }
13543
13544     if (!isALPHA(*pos)) {
13545         U8 tmpbuf[UTF8_MAXBYTES+1];
13546
13547         if (*s == 'v')
13548             s++;  /* get past 'v' */
13549
13550         sv_setpvs(sv, "");
13551
13552         for (;;) {
13553             /* this is atoi() that tolerates underscores */
13554             U8 *tmpend;
13555             UV rev = 0;
13556             const char *end = pos;
13557             UV mult = 1;
13558             while (--end >= s) {
13559                 if (*end != '_') {
13560                     const UV orev = rev;
13561                     rev += (*end - '0') * mult;
13562                     mult *= 10;
13563                     if (orev > rev)
13564                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13565                                          "Integer overflow in decimal number");
13566                 }
13567             }
13568 #ifdef EBCDIC
13569             if (rev > 0x7FFFFFFF)
13570                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13571 #endif
13572             /* Append native character for the rev point */
13573             tmpend = uvchr_to_utf8(tmpbuf, rev);
13574             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13575             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13576                  SvUTF8_on(sv);
13577             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13578                  s = ++pos;
13579             else {
13580                  s = pos;
13581                  break;
13582             }
13583             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13584                  pos++;
13585         }
13586         SvPOK_on(sv);
13587         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13588         SvRMAGICAL_on(sv);
13589     }
13590     return (char *)s;
13591 }
13592
13593 int
13594 Perl_keyword_plugin_standard(pTHX_
13595         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13596 {
13597     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13598     PERL_UNUSED_CONTEXT;
13599     PERL_UNUSED_ARG(keyword_ptr);
13600     PERL_UNUSED_ARG(keyword_len);
13601     PERL_UNUSED_ARG(op_ptr);
13602     return KEYWORD_PLUGIN_DECLINE;
13603 }
13604
13605 /*
13606  * Local variables:
13607  * c-indentation-style: bsd
13608  * c-basic-offset: 4
13609  * indent-tabs-mode: t
13610  * End:
13611  *
13612  * ex: set ts=8 sts=4 sw=4 noet:
13613  */