toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 940 {
 941     dVAR;
 942     char *bufptr;
 943     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 944     if (flags & ~(LEX_STUFF_UTF8))
 945         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 946     if (UTF) {
 947         if (flags & LEX_STUFF_UTF8) {
 948             goto plain_copy;
 949         } else {
 950             STRLEN highhalf = 0;
 951             const char *p, *e = pv+len;
 952             for (p = pv; p != e; p++)
 953                 highhalf += !!(((U8)*p) & 0x80);
 954             if (!highhalf)
 955                 goto plain_copy;
 956             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 957             bufptr = PL_parser->bufptr;
 958             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 959             SvCUR_set(PL_parser->linestr,
 960                 SvCUR(PL_parser->linestr) + len+highhalf);
 961             PL_parser->bufend += len+highhalf;
 962             for (p = pv; p != e; p++) {
 963                 U8 c = (U8)*p;
 964                 if (c & 0x80) {
 965                     *bufptr++ = (char)(0xc0 | (c >> 6));
 966                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 967                 } else {
 968                     *bufptr++ = (char)c;
 969                 }
 970             }
 971         }
 972     } else {
 973         if (flags & LEX_STUFF_UTF8) {
 974             STRLEN highhalf = 0;
 975             const char *p, *e = pv+len;
 976             for (p = pv; p != e; p++) {
 977                 U8 c = (U8)*p;
 978                 if (c >= 0xc4) {
 979                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 980                                 "non-Latin-1 character into Latin-1 input");
 981                 } else if (c >= 0xc2 && p+1 != e &&
 982                             (((U8)p[1]) & 0xc0) == 0x80) {
 983                     p++;
 984                     highhalf++;
 985                 } else if (c >= 0x80) {
 986                     /* malformed UTF-8 */
 987                     ENTER;
 988                     SAVESPTR(PL_warnhook);
 989                     PL_warnhook = PERL_WARNHOOK_FATAL;
 990                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 991                     LEAVE;
 992                 }
 993             }
 994             if (!highhalf)
 995                 goto plain_copy;
 996             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 997             bufptr = PL_parser->bufptr;
 998             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 999             SvCUR_set(PL_parser->linestr,
1000                 SvCUR(PL_parser->linestr) + len-highhalf);
1001             PL_parser->bufend += len-highhalf;
1002             for (p = pv; p != e; p++) {
1003                 U8 c = (U8)*p;
1004                 if (c & 0x80) {
1005                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1006                     p++;
1007                 } else {
1008                     *bufptr++ = (char)c;
1009                 }
1010             }
1011         } else {
1012             plain_copy:
1013             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1014             bufptr = PL_parser->bufptr;
1015             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1016             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1017             PL_parser->bufend += len;
1018             Copy(pv, bufptr, len, char);
1019         }
1020     }
1021 }
1022
1023 /*
1024 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1025
1026 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1027 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1028 reallocating the buffer if necessary.  This means that lexing code that
1029 runs later will see the characters as if they had appeared in the input.
1030 It is not recommended to do this as part of normal parsing, and most
1031 uses of this facility run the risk of the inserted characters being
1032 interpreted in an unintended manner.
1033
1034 The string to be inserted is the string value of I<sv>.  The characters
1035 are recoded for the lexer buffer, according to how the buffer is currently
1036 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1037 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1038 need to construct a scalar.
1039
1040 =cut
1041 */
1042
1043 void
1044 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1045 {
1046     char *pv;
1047     STRLEN len;
1048     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1049     if (flags)
1050         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1051     pv = SvPV(sv, len);
1052     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1053 }
1054
1055 /*
1056 =for apidoc Amx|void|lex_unstuff|char *ptr
1057
1058 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1059 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1060 This hides the discarded text from any lexing code that runs later,
1061 as if the text had never appeared.
1062
1063 This is not the normal way to consume lexed text.  For that, use
1064 L</lex_read_to>.
1065
1066 =cut
1067 */
1068
1069 void
1070 Perl_lex_unstuff(pTHX_ char *ptr)
1071 {
1072     char *buf, *bufend;
1073     STRLEN unstuff_len;
1074     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1075     buf = PL_parser->bufptr;
1076     if (ptr < buf)
1077         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1078     if (ptr == buf)
1079         return;
1080     bufend = PL_parser->bufend;
1081     if (ptr > bufend)
1082         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1083     unstuff_len = ptr - buf;
1084     Move(ptr, buf, bufend+1-ptr, char);
1085     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1086     PL_parser->bufend = bufend - unstuff_len;
1087 }
1088
1089 /*
1090 =for apidoc Amx|void|lex_read_to|char *ptr
1091
1092 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1093 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1094 performing the correct bookkeeping whenever a newline character is passed.
1095 This is the normal way to consume lexed text.
1096
1097 Interpretation of the buffer's octets can be abstracted out by
1098 using the slightly higher-level functions L</lex_peek_unichar> and
1099 L</lex_read_unichar>.
1100
1101 =cut
1102 */
1103
1104 void
1105 Perl_lex_read_to(pTHX_ char *ptr)
1106 {
1107     char *s;
1108     PERL_ARGS_ASSERT_LEX_READ_TO;
1109     s = PL_parser->bufptr;
1110     if (ptr < s || ptr > PL_parser->bufend)
1111         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1112     for (; s != ptr; s++)
1113         if (*s == '\n') {
1114             CopLINE_inc(PL_curcop);
1115             PL_parser->linestart = s+1;
1116         }
1117     PL_parser->bufptr = ptr;
1118 }
1119
1120 /*
1121 =for apidoc Amx|void|lex_discard_to|char *ptr
1122
1123 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1124 up to I<ptr>.  The remaining content of the buffer will be moved, and
1125 all pointers into the buffer updated appropriately.  I<ptr> must not
1126 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1127 it is not permitted to discard text that has yet to be lexed.
1128
1129 Normally it is not necessarily to do this directly, because it suffices to
1130 use the implicit discarding behaviour of L</lex_next_chunk> and things
1131 based on it.  However, if a token stretches across multiple lines,
1132 and the lexing code has kept multiple lines of text in the buffer for
1133 that purpose, then after completion of the token it would be wise to
1134 explicitly discard the now-unneeded earlier lines, to avoid future
1135 multi-line tokens growing the buffer without bound.
1136
1137 =cut
1138 */
1139
1140 void
1141 Perl_lex_discard_to(pTHX_ char *ptr)
1142 {
1143     char *buf;
1144     STRLEN discard_len;
1145     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1146     buf = SvPVX(PL_parser->linestr);
1147     if (ptr < buf)
1148         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1149     if (ptr == buf)
1150         return;
1151     if (ptr > PL_parser->bufptr)
1152         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1153     discard_len = ptr - buf;
1154     if (PL_parser->oldbufptr < ptr)
1155         PL_parser->oldbufptr = ptr;
1156     if (PL_parser->oldoldbufptr < ptr)
1157         PL_parser->oldoldbufptr = ptr;
1158     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1159         PL_parser->last_uni = NULL;
1160     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1161         PL_parser->last_lop = NULL;
1162     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1163     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1164     PL_parser->bufend -= discard_len;
1165     PL_parser->bufptr -= discard_len;
1166     PL_parser->oldbufptr -= discard_len;
1167     PL_parser->oldoldbufptr -= discard_len;
1168     if (PL_parser->last_uni)
1169         PL_parser->last_uni -= discard_len;
1170     if (PL_parser->last_lop)
1171         PL_parser->last_lop -= discard_len;
1172 }
1173
1174 /*
1175 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1176
1177 Reads in the next chunk of text to be lexed, appending it to
1178 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1179 looked to the end of the current chunk and wants to know more.  It is
1180 usual, but not necessary, for lexing to have consumed the entirety of
1181 the current chunk at this time.
1182
1183 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1184 chunk (i.e., the current chunk has been entirely consumed), normally the
1185 current chunk will be discarded at the same time that the new chunk is
1186 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1187 will not be discarded.  If the current chunk has not been entirely
1188 consumed, then it will not be discarded regardless of the flag.
1189
1190 Returns true if some new text was added to the buffer, or false if the
1191 buffer has reached the end of the input text.
1192
1193 =cut
1194 */
1195
1196 #define LEX_FAKE_EOF 0x80000000
1197
1198 bool
1199 Perl_lex_next_chunk(pTHX_ U32 flags)
1200 {
1201     SV *linestr;
1202     char *buf;
1203     STRLEN old_bufend_pos, new_bufend_pos;
1204     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1205     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1206     bool got_some_for_debugger = 0;
1207     bool got_some;
1208     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1209         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1210     linestr = PL_parser->linestr;
1211     buf = SvPVX(linestr);
1212     if (!(flags & LEX_KEEP_PREVIOUS) &&
1213             PL_parser->bufptr == PL_parser->bufend) {
1214         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1215         linestart_pos = 0;
1216         if (PL_parser->last_uni != PL_parser->bufend)
1217             PL_parser->last_uni = NULL;
1218         if (PL_parser->last_lop != PL_parser->bufend)
1219             PL_parser->last_lop = NULL;
1220         last_uni_pos = last_lop_pos = 0;
1221         *buf = 0;
1222         SvCUR(linestr) = 0;
1223     } else {
1224         old_bufend_pos = PL_parser->bufend - buf;
1225         bufptr_pos = PL_parser->bufptr - buf;
1226         oldbufptr_pos = PL_parser->oldbufptr - buf;
1227         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1228         linestart_pos = PL_parser->linestart - buf;
1229         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1230         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1231     }
1232     if (flags & LEX_FAKE_EOF) {
1233         goto eof;
1234     } else if (!PL_parser->rsfp) {
1235         got_some = 0;
1236     } else if (filter_gets(linestr, old_bufend_pos)) {
1237         got_some = 1;
1238         got_some_for_debugger = 1;
1239     } else {
1240         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1241             sv_setpvs(linestr, "");
1242         eof:
1243         /* End of real input.  Close filehandle (unless it was STDIN),
1244          * then add implicit termination.
1245          */
1246         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1247             PerlIO_clearerr(PL_parser->rsfp);
1248         else if (PL_parser->rsfp)
1249             (void)PerlIO_close(PL_parser->rsfp);
1250         PL_parser->rsfp = NULL;
1251         PL_doextract = FALSE;
1252 #ifdef PERL_MAD
1253         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1254             PL_faketokens = 1;
1255 #endif
1256         if (!PL_in_eval && PL_minus_p) {
1257             sv_catpvs(linestr,
1258                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1259             PL_minus_n = PL_minus_p = 0;
1260         } else if (!PL_in_eval && PL_minus_n) {
1261             sv_catpvs(linestr, /*{*/";}");
1262             PL_minus_n = 0;
1263         } else
1264             sv_catpvs(linestr, ";");
1265         got_some = 1;
1266     }
1267     buf = SvPVX(linestr);
1268     new_bufend_pos = SvCUR(linestr);
1269     PL_parser->bufend = buf + new_bufend_pos;
1270     PL_parser->bufptr = buf + bufptr_pos;
1271     PL_parser->oldbufptr = buf + oldbufptr_pos;
1272     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1273     PL_parser->linestart = buf + linestart_pos;
1274     if (PL_parser->last_uni)
1275         PL_parser->last_uni = buf + last_uni_pos;
1276     if (PL_parser->last_lop)
1277         PL_parser->last_lop = buf + last_lop_pos;
1278     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1279             PL_curstash != PL_debstash) {
1280         /* debugger active and we're not compiling the debugger code,
1281          * so store the line into the debugger's array of lines
1282          */
1283         update_debugger_info(NULL, buf+old_bufend_pos,
1284             new_bufend_pos-old_bufend_pos);
1285     }
1286     return got_some;
1287 }
1288
1289 /*
1290 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1291
1292 Looks ahead one (Unicode) character in the text currently being lexed.
1293 Returns the codepoint (unsigned integer value) of the next character,
1294 or -1 if lexing has reached the end of the input text.  To consume the
1295 peeked character, use L</lex_read_unichar>.
1296
1297 If the next character is in (or extends into) the next chunk of input
1298 text, the next chunk will be read in.  Normally the current chunk will be
1299 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1300 then the current chunk will not be discarded.
1301
1302 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1303 is encountered, an exception is generated.
1304
1305 =cut
1306 */
1307
1308 I32
1309 Perl_lex_peek_unichar(pTHX_ U32 flags)
1310 {
1311     dVAR;
1312     char *s, *bufend;
1313     if (flags & ~(LEX_KEEP_PREVIOUS))
1314         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1315     s = PL_parser->bufptr;
1316     bufend = PL_parser->bufend;
1317     if (UTF) {
1318         U8 head;
1319         I32 unichar;
1320         STRLEN len, retlen;
1321         if (s == bufend) {
1322             if (!lex_next_chunk(flags))
1323                 return -1;
1324             s = PL_parser->bufptr;
1325             bufend = PL_parser->bufend;
1326         }
1327         head = (U8)*s;
1328         if (!(head & 0x80))
1329             return head;
1330         if (head & 0x40) {
1331             len = PL_utf8skip[head];
1332             while ((STRLEN)(bufend-s) < len) {
1333                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1334                     break;
1335                 s = PL_parser->bufptr;
1336                 bufend = PL_parser->bufend;
1337             }
1338         }
1339         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1340         if (retlen == (STRLEN)-1) {
1341             /* malformed UTF-8 */
1342             ENTER;
1343             SAVESPTR(PL_warnhook);
1344             PL_warnhook = PERL_WARNHOOK_FATAL;
1345             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1346             LEAVE;
1347         }
1348         return unichar;
1349     } else {
1350         if (s == bufend) {
1351             if (!lex_next_chunk(flags))
1352                 return -1;
1353             s = PL_parser->bufptr;
1354         }
1355         return (U8)*s;
1356     }
1357 }
1358
1359 /*
1360 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1361
1362 Reads the next (Unicode) character in the text currently being lexed.
1363 Returns the codepoint (unsigned integer value) of the character read,
1364 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1365 if lexing has reached the end of the input text.  To non-destructively
1366 examine the next character, use L</lex_peek_unichar> instead.
1367
1368 If the next character is in (or extends into) the next chunk of input
1369 text, the next chunk will be read in.  Normally the current chunk will be
1370 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1371 then the current chunk will not be discarded.
1372
1373 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1374 is encountered, an exception is generated.
1375
1376 =cut
1377 */
1378
1379 I32
1380 Perl_lex_read_unichar(pTHX_ U32 flags)
1381 {
1382     I32 c;
1383     if (flags & ~(LEX_KEEP_PREVIOUS))
1384         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1385     c = lex_peek_unichar(flags);
1386     if (c != -1) {
1387         if (c == '\n')
1388             CopLINE_inc(PL_curcop);
1389         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1390     }
1391     return c;
1392 }
1393
1394 /*
1395 =for apidoc Amx|void|lex_read_space|U32 flags
1396
1397 Reads optional spaces, in Perl style, in the text currently being
1398 lexed.  The spaces may include ordinary whitespace characters and
1399 Perl-style comments.  C<#line> directives are processed if encountered.
1400 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1401 at a non-space character (or the end of the input text).
1402
1403 If spaces extend into the next chunk of input text, the next chunk will
1404 be read in.  Normally the current chunk will be discarded at the same
1405 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1406 chunk will not be discarded.
1407
1408 =cut
1409 */
1410
1411 #define LEX_NO_NEXT_CHUNK 0x80000000
1412
1413 void
1414 Perl_lex_read_space(pTHX_ U32 flags)
1415 {
1416     char *s, *bufend;
1417     bool need_incline = 0;
1418     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1419         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1420 #ifdef PERL_MAD
1421     if (PL_skipwhite) {
1422         sv_free(PL_skipwhite);
1423         PL_skipwhite = NULL;
1424     }
1425     if (PL_madskills)
1426         PL_skipwhite = newSVpvs("");
1427 #endif /* PERL_MAD */
1428     s = PL_parser->bufptr;
1429     bufend = PL_parser->bufend;
1430     while (1) {
1431         char c = *s;
1432         if (c == '#') {
1433             do {
1434                 c = *++s;
1435             } while (!(c == '\n' || (c == 0 && s == bufend)));
1436         } else if (c == '\n') {
1437             s++;
1438             PL_parser->linestart = s;
1439             if (s == bufend)
1440                 need_incline = 1;
1441             else
1442                 incline(s);
1443         } else if (isSPACE(c)) {
1444             s++;
1445         } else if (c == 0 && s == bufend) {
1446             bool got_more;
1447 #ifdef PERL_MAD
1448             if (PL_madskills)
1449                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1450 #endif /* PERL_MAD */
1451             if (flags & LEX_NO_NEXT_CHUNK)
1452                 break;
1453             PL_parser->bufptr = s;
1454             CopLINE_inc(PL_curcop);
1455             got_more = lex_next_chunk(flags);
1456             CopLINE_dec(PL_curcop);
1457             s = PL_parser->bufptr;
1458             bufend = PL_parser->bufend;
1459             if (!got_more)
1460                 break;
1461             if (need_incline && PL_parser->rsfp) {
1462                 incline(s);
1463                 need_incline = 0;
1464             }
1465         } else {
1466             break;
1467         }
1468     }
1469 #ifdef PERL_MAD
1470     if (PL_madskills)
1471         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1472 #endif /* PERL_MAD */
1473     PL_parser->bufptr = s;
1474 }
1475
1476 /*
1477  * S_incline
1478  * This subroutine has nothing to do with tilting, whether at windmills
1479  * or pinball tables.  Its name is short for "increment line".  It
1480  * increments the current line number in CopLINE(PL_curcop) and checks
1481  * to see whether the line starts with a comment of the form
1482  *    # line 500 "foo.pm"
1483  * If so, it sets the current line number and file to the values in the comment.
1484  */
1485
1486 STATIC void
1487 S_incline(pTHX_ const char *s)
1488 {
1489     dVAR;
1490     const char *t;
1491     const char *n;
1492     const char *e;
1493
1494     PERL_ARGS_ASSERT_INCLINE;
1495
1496     CopLINE_inc(PL_curcop);
1497     if (*s++ != '#')
1498         return;
1499     while (SPACE_OR_TAB(*s))
1500         s++;
1501     if (strnEQ(s, "line", 4))
1502         s += 4;
1503     else
1504         return;
1505     if (SPACE_OR_TAB(*s))
1506         s++;
1507     else
1508         return;
1509     while (SPACE_OR_TAB(*s))
1510         s++;
1511     if (!isDIGIT(*s))
1512         return;
1513
1514     n = s;
1515     while (isDIGIT(*s))
1516         s++;
1517     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1518         return;
1519     while (SPACE_OR_TAB(*s))
1520         s++;
1521     if (*s == '"' && (t = strchr(s+1, '"'))) {
1522         s++;
1523         e = t + 1;
1524     }
1525     else {
1526         t = s;
1527         while (!isSPACE(*t))
1528             t++;
1529         e = t;
1530     }
1531     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1532         e++;
1533     if (*e != '\n' && *e != '\0')
1534         return;         /* false alarm */
1535
1536     if (t - s > 0) {
1537         const STRLEN len = t - s;
1538 #ifndef USE_ITHREADS
1539         SV *const temp_sv = CopFILESV(PL_curcop);
1540         const char *cf;
1541         STRLEN tmplen;
1542
1543         if (temp_sv) {
1544             cf = SvPVX(temp_sv);
1545             tmplen = SvCUR(temp_sv);
1546         } else {
1547             cf = NULL;
1548             tmplen = 0;
1549         }
1550
1551         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1552             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1553              * to *{"::_<newfilename"} */
1554             /* However, the long form of evals is only turned on by the
1555                debugger - usually they're "(eval %lu)" */
1556             char smallbuf[128];
1557             char *tmpbuf;
1558             GV **gvp;
1559             STRLEN tmplen2 = len;
1560             if (tmplen + 2 <= sizeof smallbuf)
1561                 tmpbuf = smallbuf;
1562             else
1563                 Newx(tmpbuf, tmplen + 2, char);
1564             tmpbuf[0] = '_';
1565             tmpbuf[1] = '<';
1566             memcpy(tmpbuf + 2, cf, tmplen);
1567             tmplen += 2;
1568             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1569             if (gvp) {
1570                 char *tmpbuf2;
1571                 GV *gv2;
1572
1573                 if (tmplen2 + 2 <= sizeof smallbuf)
1574                     tmpbuf2 = smallbuf;
1575                 else
1576                     Newx(tmpbuf2, tmplen2 + 2, char);
1577
1578                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1579                     /* Either they malloc'd it, or we malloc'd it,
1580                        so no prefix is present in ours.  */
1581                     tmpbuf2[0] = '_';
1582                     tmpbuf2[1] = '<';
1583                 }
1584
1585                 memcpy(tmpbuf2 + 2, s, tmplen2);
1586                 tmplen2 += 2;
1587
1588                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1589                 if (!isGV(gv2)) {
1590                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1591                     /* adjust ${"::_<newfilename"} to store the new file name */
1592                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1593                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1594                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1595                 }
1596
1597                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1598             }
1599             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1600         }
1601 #endif
1602         CopFILE_free(PL_curcop);
1603         CopFILE_setn(PL_curcop, s, len);
1604     }
1605     CopLINE_set(PL_curcop, atoi(n)-1);
1606 }
1607
1608 #ifdef PERL_MAD
1609 /* skip space before PL_thistoken */
1610
1611 STATIC char *
1612 S_skipspace0(pTHX_ register char *s)
1613 {
1614     PERL_ARGS_ASSERT_SKIPSPACE0;
1615
1616     s = skipspace(s);
1617     if (!PL_madskills)
1618         return s;
1619     if (PL_skipwhite) {
1620         if (!PL_thiswhite)
1621             PL_thiswhite = newSVpvs("");
1622         sv_catsv(PL_thiswhite, PL_skipwhite);
1623         sv_free(PL_skipwhite);
1624         PL_skipwhite = 0;
1625     }
1626     PL_realtokenstart = s - SvPVX(PL_linestr);
1627     return s;
1628 }
1629
1630 /* skip space after PL_thistoken */
1631
1632 STATIC char *
1633 S_skipspace1(pTHX_ register char *s)
1634 {
1635     const char *start = s;
1636     I32 startoff = start - SvPVX(PL_linestr);
1637
1638     PERL_ARGS_ASSERT_SKIPSPACE1;
1639
1640     s = skipspace(s);
1641     if (!PL_madskills)
1642         return s;
1643     start = SvPVX(PL_linestr) + startoff;
1644     if (!PL_thistoken && PL_realtokenstart >= 0) {
1645         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1646         PL_thistoken = newSVpvn(tstart, start - tstart);
1647     }
1648     PL_realtokenstart = -1;
1649     if (PL_skipwhite) {
1650         if (!PL_nextwhite)
1651             PL_nextwhite = newSVpvs("");
1652         sv_catsv(PL_nextwhite, PL_skipwhite);
1653         sv_free(PL_skipwhite);
1654         PL_skipwhite = 0;
1655     }
1656     return s;
1657 }
1658
1659 STATIC char *
1660 S_skipspace2(pTHX_ register char *s, SV **svp)
1661 {
1662     char *start;
1663     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1664     const I32 startoff = s - SvPVX(PL_linestr);
1665
1666     PERL_ARGS_ASSERT_SKIPSPACE2;
1667
1668     s = skipspace(s);
1669     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1670     if (!PL_madskills || !svp)
1671         return s;
1672     start = SvPVX(PL_linestr) + startoff;
1673     if (!PL_thistoken && PL_realtokenstart >= 0) {
1674         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1675         PL_thistoken = newSVpvn(tstart, start - tstart);
1676         PL_realtokenstart = -1;
1677     }
1678     if (PL_skipwhite) {
1679         if (!*svp)
1680             *svp = newSVpvs("");
1681         sv_setsv(*svp, PL_skipwhite);
1682         sv_free(PL_skipwhite);
1683         PL_skipwhite = 0;
1684     }
1685
1686     return s;
1687 }
1688 #endif
1689
1690 STATIC void
1691 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1692 {
1693     AV *av = CopFILEAVx(PL_curcop);
1694     if (av) {
1695         SV * const sv = newSV_type(SVt_PVMG);
1696         if (orig_sv)
1697             sv_setsv(sv, orig_sv);
1698         else
1699             sv_setpvn(sv, buf, len);
1700         (void)SvIOK_on(sv);
1701         SvIV_set(sv, 0);
1702         av_store(av, (I32)CopLINE(PL_curcop), sv);
1703     }
1704 }
1705
1706 /*
1707  * S_skipspace
1708  * Called to gobble the appropriate amount and type of whitespace.
1709  * Skips comments as well.
1710  */
1711
1712 STATIC char *
1713 S_skipspace(pTHX_ register char *s)
1714 {
1715 #ifdef PERL_MAD
1716     char *start = s;
1717 #endif /* PERL_MAD */
1718     PERL_ARGS_ASSERT_SKIPSPACE;
1719 #ifdef PERL_MAD
1720     if (PL_skipwhite) {
1721         sv_free(PL_skipwhite);
1722         PL_skipwhite = NULL;
1723     }
1724 #endif /* PERL_MAD */
1725     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1726         while (s < PL_bufend && SPACE_OR_TAB(*s))
1727             s++;
1728     } else {
1729         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1730         PL_bufptr = s;
1731         lex_read_space(LEX_KEEP_PREVIOUS |
1732                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1733                     LEX_NO_NEXT_CHUNK : 0));
1734         s = PL_bufptr;
1735         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1736         if (PL_linestart > PL_bufptr)
1737             PL_bufptr = PL_linestart;
1738         return s;
1739     }
1740 #ifdef PERL_MAD
1741     if (PL_madskills)
1742         PL_skipwhite = newSVpvn(start, s-start);
1743 #endif /* PERL_MAD */
1744     return s;
1745 }
1746
1747 /*
1748  * S_check_uni
1749  * Check the unary operators to ensure there's no ambiguity in how they're
1750  * used.  An ambiguous piece of code would be:
1751  *     rand + 5
1752  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1753  * the +5 is its argument.
1754  */
1755
1756 STATIC void
1757 S_check_uni(pTHX)
1758 {
1759     dVAR;
1760     const char *s;
1761     const char *t;
1762
1763     if (PL_oldoldbufptr != PL_last_uni)
1764         return;
1765     while (isSPACE(*PL_last_uni))
1766         PL_last_uni++;
1767     s = PL_last_uni;
1768     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1769         s++;
1770     if ((t = strchr(s, '(')) && t < PL_bufptr)
1771         return;
1772
1773     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1774                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1775                      (int)(s - PL_last_uni), PL_last_uni);
1776 }
1777
1778 /*
1779  * LOP : macro to build a list operator.  Its behaviour has been replaced
1780  * with a subroutine, S_lop() for which LOP is just another name.
1781  */
1782
1783 #define LOP(f,x) return lop(f,x,s)
1784
1785 /*
1786  * S_lop
1787  * Build a list operator (or something that might be one).  The rules:
1788  *  - if we have a next token, then it's a list operator [why?]
1789  *  - if the next thing is an opening paren, then it's a function
1790  *  - else it's a list operator
1791  */
1792
1793 STATIC I32
1794 S_lop(pTHX_ I32 f, int x, char *s)
1795 {
1796     dVAR;
1797
1798     PERL_ARGS_ASSERT_LOP;
1799
1800     pl_yylval.ival = f;
1801     CLINE;
1802     PL_expect = x;
1803     PL_bufptr = s;
1804     PL_last_lop = PL_oldbufptr;
1805     PL_last_lop_op = (OPCODE)f;
1806 #ifdef PERL_MAD
1807     if (PL_lasttoke)
1808         return REPORT(LSTOP);
1809 #else
1810     if (PL_nexttoke)
1811         return REPORT(LSTOP);
1812 #endif
1813     if (*s == '(')
1814         return REPORT(FUNC);
1815     s = PEEKSPACE(s);
1816     if (*s == '(')
1817         return REPORT(FUNC);
1818     else
1819         return REPORT(LSTOP);
1820 }
1821
1822 #ifdef PERL_MAD
1823  /*
1824  * S_start_force
1825  * Sets up for an eventual force_next().  start_force(0) basically does
1826  * an unshift, while start_force(-1) does a push.  yylex removes items
1827  * on the "pop" end.
1828  */
1829
1830 STATIC void
1831 S_start_force(pTHX_ int where)
1832 {
1833     int i;
1834
1835     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1836         where = PL_lasttoke;
1837     assert(PL_curforce < 0 || PL_curforce == where);
1838     if (PL_curforce != where) {
1839         for (i = PL_lasttoke; i > where; --i) {
1840             PL_nexttoke[i] = PL_nexttoke[i-1];
1841         }
1842         PL_lasttoke++;
1843     }
1844     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1845         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1846     PL_curforce = where;
1847     if (PL_nextwhite) {
1848         if (PL_madskills)
1849             curmad('^', newSVpvs(""));
1850         CURMAD('_', PL_nextwhite);
1851     }
1852 }
1853
1854 STATIC void
1855 S_curmad(pTHX_ char slot, SV *sv)
1856 {
1857     MADPROP **where;
1858
1859     if (!sv)
1860         return;
1861     if (PL_curforce < 0)
1862         where = &PL_thismad;
1863     else
1864         where = &PL_nexttoke[PL_curforce].next_mad;
1865
1866     if (PL_faketokens)
1867         sv_setpvs(sv, "");
1868     else {
1869         if (!IN_BYTES) {
1870             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1871                 SvUTF8_on(sv);
1872             else if (PL_encoding) {
1873                 sv_recode_to_utf8(sv, PL_encoding);
1874             }
1875         }
1876     }
1877
1878     /* keep a slot open for the head of the list? */
1879     if (slot != '_' && *where && (*where)->mad_key == '^') {
1880         (*where)->mad_key = slot;
1881         sv_free(MUTABLE_SV(((*where)->mad_val)));
1882         (*where)->mad_val = (void*)sv;
1883     }
1884     else
1885         addmad(newMADsv(slot, sv), where, 0);
1886 }
1887 #else
1888 #  define start_force(where)    NOOP
1889 #  define curmad(slot, sv)      NOOP
1890 #endif
1891
1892 /*
1893  * S_force_next
1894  * When the lexer realizes it knows the next token (for instance,
1895  * it is reordering tokens for the parser) then it can call S_force_next
1896  * to know what token to return the next time the lexer is called.  Caller
1897  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1898  * and possibly PL_expect to ensure the lexer handles the token correctly.
1899  */
1900
1901 STATIC void
1902 S_force_next(pTHX_ I32 type)
1903 {
1904     dVAR;
1905 #ifdef DEBUGGING
1906     if (DEBUG_T_TEST) {
1907         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1908         tokereport(type, &NEXTVAL_NEXTTOKE);
1909     }
1910 #endif
1911 #ifdef PERL_MAD
1912     if (PL_curforce < 0)
1913         start_force(PL_lasttoke);
1914     PL_nexttoke[PL_curforce].next_type = type;
1915     if (PL_lex_state != LEX_KNOWNEXT)
1916         PL_lex_defer = PL_lex_state;
1917     PL_lex_state = LEX_KNOWNEXT;
1918     PL_lex_expect = PL_expect;
1919     PL_curforce = -1;
1920 #else
1921     PL_nexttype[PL_nexttoke] = type;
1922     PL_nexttoke++;
1923     if (PL_lex_state != LEX_KNOWNEXT) {
1924         PL_lex_defer = PL_lex_state;
1925         PL_lex_expect = PL_expect;
1926         PL_lex_state = LEX_KNOWNEXT;
1927     }
1928 #endif
1929 }
1930
1931 STATIC SV *
1932 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1933 {
1934     dVAR;
1935     SV * const sv = newSVpvn_utf8(start, len,
1936                                   !IN_BYTES
1937                                   && UTF
1938                                   && !is_ascii_string((const U8*)start, len)
1939                                   && is_utf8_string((const U8*)start, len));
1940     return sv;
1941 }
1942
1943 /*
1944  * S_force_word
1945  * When the lexer knows the next thing is a word (for instance, it has
1946  * just seen -> and it knows that the next char is a word char, then
1947  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1948  * lookahead.
1949  *
1950  * Arguments:
1951  *   char *start : buffer position (must be within PL_linestr)
1952  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1953  *   int check_keyword : if true, Perl checks to make sure the word isn't
1954  *       a keyword (do this if the word is a label, e.g. goto FOO)
1955  *   int allow_pack : if true, : characters will also be allowed (require,
1956  *       use, etc. do this)
1957  *   int allow_initial_tick : used by the "sub" lexer only.
1958  */
1959
1960 STATIC char *
1961 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1962 {
1963     dVAR;
1964     register char *s;
1965     STRLEN len;
1966
1967     PERL_ARGS_ASSERT_FORCE_WORD;
1968
1969     start = SKIPSPACE1(start);
1970     s = start;
1971     if (isIDFIRST_lazy_if(s,UTF) ||
1972         (allow_pack && *s == ':') ||
1973         (allow_initial_tick && *s == '\'') )
1974     {
1975         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1976         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1977             return start;
1978         start_force(PL_curforce);
1979         if (PL_madskills)
1980             curmad('X', newSVpvn(start,s-start));
1981         if (token == METHOD) {
1982             s = SKIPSPACE1(s);
1983             if (*s == '(')
1984                 PL_expect = XTERM;
1985             else {
1986                 PL_expect = XOPERATOR;
1987             }
1988         }
1989         if (PL_madskills)
1990             curmad('g', newSVpvs( "forced" ));
1991         NEXTVAL_NEXTTOKE.opval
1992             = (OP*)newSVOP(OP_CONST,0,
1993                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1994         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1995         force_next(token);
1996     }
1997     return s;
1998 }
1999
2000 /*
2001  * S_force_ident
2002  * Called when the lexer wants $foo *foo &foo etc, but the program
2003  * text only contains the "foo" portion.  The first argument is a pointer
2004  * to the "foo", and the second argument is the type symbol to prefix.
2005  * Forces the next token to be a "WORD".
2006  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2007  */
2008
2009 STATIC void
2010 S_force_ident(pTHX_ register const char *s, int kind)
2011 {
2012     dVAR;
2013
2014     PERL_ARGS_ASSERT_FORCE_IDENT;
2015
2016     if (*s) {
2017         const STRLEN len = strlen(s);
2018         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2019         start_force(PL_curforce);
2020         NEXTVAL_NEXTTOKE.opval = o;
2021         force_next(WORD);
2022         if (kind) {
2023             o->op_private = OPpCONST_ENTERED;
2024             /* XXX see note in pp_entereval() for why we forgo typo
2025                warnings if the symbol must be introduced in an eval.
2026                GSAR 96-10-12 */
2027             gv_fetchpvn_flags(s, len,
2028                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2029                               : GV_ADD,
2030                               kind == '$' ? SVt_PV :
2031                               kind == '@' ? SVt_PVAV :
2032                               kind == '%' ? SVt_PVHV :
2033                               SVt_PVGV
2034                               );
2035         }
2036     }
2037 }
2038
2039 NV
2040 Perl_str_to_version(pTHX_ SV *sv)
2041 {
2042     NV retval = 0.0;
2043     NV nshift = 1.0;
2044     STRLEN len;
2045     const char *start = SvPV_const(sv,len);
2046     const char * const end = start + len;
2047     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2048
2049     PERL_ARGS_ASSERT_STR_TO_VERSION;
2050
2051     while (start < end) {
2052         STRLEN skip;
2053         UV n;
2054         if (utf)
2055             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2056         else {
2057             n = *(U8*)start;
2058             skip = 1;
2059         }
2060         retval += ((NV)n)/nshift;
2061         start += skip;
2062         nshift *= 1000;
2063     }
2064     return retval;
2065 }
2066
2067 /*
2068  * S_force_version
2069  * Forces the next token to be a version number.
2070  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2071  * and if "guessing" is TRUE, then no new token is created (and the caller
2072  * must use an alternative parsing method).
2073  */
2074
2075 STATIC char *
2076 S_force_version(pTHX_ char *s, int guessing)
2077 {
2078     dVAR;
2079     OP *version = NULL;
2080     char *d;
2081 #ifdef PERL_MAD
2082     I32 startoff = s - SvPVX(PL_linestr);
2083 #endif
2084
2085     PERL_ARGS_ASSERT_FORCE_VERSION;
2086
2087     s = SKIPSPACE1(s);
2088
2089     d = s;
2090     if (*d == 'v')
2091         d++;
2092     if (isDIGIT(*d)) {
2093         while (isDIGIT(*d) || *d == '_' || *d == '.')
2094             d++;
2095 #ifdef PERL_MAD
2096         if (PL_madskills) {
2097             start_force(PL_curforce);
2098             curmad('X', newSVpvn(s,d-s));
2099         }
2100 #endif
2101         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2102             SV *ver;
2103 #ifdef USE_LOCALE_NUMERIC
2104             char *loc = setlocale(LC_NUMERIC, "C");
2105 #endif
2106             s = scan_num(s, &pl_yylval);
2107 #ifdef USE_LOCALE_NUMERIC
2108             setlocale(LC_NUMERIC, loc);
2109 #endif
2110             version = pl_yylval.opval;
2111             ver = cSVOPx(version)->op_sv;
2112             if (SvPOK(ver) && !SvNIOK(ver)) {
2113                 SvUPGRADE(ver, SVt_PVNV);
2114                 SvNV_set(ver, str_to_version(ver));
2115                 SvNOK_on(ver);          /* hint that it is a version */
2116             }
2117         }
2118         else if (guessing) {
2119 #ifdef PERL_MAD
2120             if (PL_madskills) {
2121                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2122                 PL_nextwhite = 0;
2123                 s = SvPVX(PL_linestr) + startoff;
2124             }
2125 #endif
2126             return s;
2127         }
2128     }
2129
2130 #ifdef PERL_MAD
2131     if (PL_madskills && !version) {
2132         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2133         PL_nextwhite = 0;
2134         s = SvPVX(PL_linestr) + startoff;
2135     }
2136 #endif
2137     /* NOTE: The parser sees the package name and the VERSION swapped */
2138     start_force(PL_curforce);
2139     NEXTVAL_NEXTTOKE.opval = version;
2140     force_next(WORD);
2141
2142     return s;
2143 }
2144
2145 /*
2146  * S_force_strict_version
2147  * Forces the next token to be a version number using strict syntax rules.
2148  */
2149
2150 STATIC char *
2151 S_force_strict_version(pTHX_ char *s)
2152 {
2153     dVAR;
2154     OP *version = NULL;
2155 #ifdef PERL_MAD
2156     I32 startoff = s - SvPVX(PL_linestr);
2157 #endif
2158     const char *errstr = NULL;
2159
2160     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2161
2162     while (isSPACE(*s)) /* leading whitespace */
2163         s++;
2164
2165     if (is_STRICT_VERSION(s,&errstr)) {
2166         SV *ver = newSV(0);
2167         s = (char *)scan_version(s, ver, 0);
2168         version = newSVOP(OP_CONST, 0, ver);
2169     }
2170     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2171             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2172     {
2173         PL_bufptr = s;
2174         if (errstr)
2175             yyerror(errstr); /* version required */
2176         return s;
2177     }
2178
2179 #ifdef PERL_MAD
2180     if (PL_madskills && !version) {
2181         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2182         PL_nextwhite = 0;
2183         s = SvPVX(PL_linestr) + startoff;
2184     }
2185 #endif
2186     /* NOTE: The parser sees the package name and the VERSION swapped */
2187     start_force(PL_curforce);
2188     NEXTVAL_NEXTTOKE.opval = version;
2189     force_next(WORD);
2190
2191     return s;
2192 }
2193
2194 /*
2195  * S_tokeq
2196  * Tokenize a quoted string passed in as an SV.  It finds the next
2197  * chunk, up to end of string or a backslash.  It may make a new
2198  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2199  * turns \\ into \.
2200  */
2201
2202 STATIC SV *
2203 S_tokeq(pTHX_ SV *sv)
2204 {
2205     dVAR;
2206     register char *s;
2207     register char *send;
2208     register char *d;
2209     STRLEN len = 0;
2210     SV *pv = sv;
2211
2212     PERL_ARGS_ASSERT_TOKEQ;
2213
2214     if (!SvLEN(sv))
2215         goto finish;
2216
2217     s = SvPV_force(sv, len);
2218     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2219         goto finish;
2220     send = s + len;
2221     while (s < send && *s != '\\')
2222         s++;
2223     if (s == send)
2224         goto finish;
2225     d = s;
2226     if ( PL_hints & HINT_NEW_STRING ) {
2227         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2228     }
2229     while (s < send) {
2230         if (*s == '\\') {
2231             if (s + 1 < send && (s[1] == '\\'))
2232                 s++;            /* all that, just for this */
2233         }
2234         *d++ = *s++;
2235     }
2236     *d = '\0';
2237     SvCUR_set(sv, d - SvPVX_const(sv));
2238   finish:
2239     if ( PL_hints & HINT_NEW_STRING )
2240        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2241     return sv;
2242 }
2243
2244 /*
2245  * Now come three functions related to double-quote context,
2246  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2247  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2248  * interact with PL_lex_state, and create fake ( ... ) argument lists
2249  * to handle functions and concatenation.
2250  * They assume that whoever calls them will be setting up a fake
2251  * join call, because each subthing puts a ',' after it.  This lets
2252  *   "lower \luPpEr"
2253  * become
2254  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2255  *
2256  * (I'm not sure whether the spurious commas at the end of lcfirst's
2257  * arguments and join's arguments are created or not).
2258  */
2259
2260 /*
2261  * S_sublex_start
2262  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2263  *
2264  * Pattern matching will set PL_lex_op to the pattern-matching op to
2265  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2266  *
2267  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2268  *
2269  * Everything else becomes a FUNC.
2270  *
2271  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2272  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2273  * call to S_sublex_push().
2274  */
2275
2276 STATIC I32
2277 S_sublex_start(pTHX)
2278 {
2279     dVAR;
2280     register const I32 op_type = pl_yylval.ival;
2281
2282     if (op_type == OP_NULL) {
2283         pl_yylval.opval = PL_lex_op;
2284         PL_lex_op = NULL;
2285         return THING;
2286     }
2287     if (op_type == OP_CONST || op_type == OP_READLINE) {
2288         SV *sv = tokeq(PL_lex_stuff);
2289
2290         if (SvTYPE(sv) == SVt_PVIV) {
2291             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2292             STRLEN len;
2293             const char * const p = SvPV_const(sv, len);
2294             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2295             SvREFCNT_dec(sv);
2296             sv = nsv;
2297         }
2298         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2299         PL_lex_stuff = NULL;
2300         /* Allow <FH> // "foo" */
2301         if (op_type == OP_READLINE)
2302             PL_expect = XTERMORDORDOR;
2303         return THING;
2304     }
2305     else if (op_type == OP_BACKTICK && PL_lex_op) {
2306         /* readpipe() vas overriden */
2307         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2308         pl_yylval.opval = PL_lex_op;
2309         PL_lex_op = NULL;
2310         PL_lex_stuff = NULL;
2311         return THING;
2312     }
2313
2314     PL_sublex_info.super_state = PL_lex_state;
2315     PL_sublex_info.sub_inwhat = (U16)op_type;
2316     PL_sublex_info.sub_op = PL_lex_op;
2317     PL_lex_state = LEX_INTERPPUSH;
2318
2319     PL_expect = XTERM;
2320     if (PL_lex_op) {
2321         pl_yylval.opval = PL_lex_op;
2322         PL_lex_op = NULL;
2323         return PMFUNC;
2324     }
2325     else
2326         return FUNC;
2327 }
2328
2329 /*
2330  * S_sublex_push
2331  * Create a new scope to save the lexing state.  The scope will be
2332  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2333  * to the uc, lc, etc. found before.
2334  * Sets PL_lex_state to LEX_INTERPCONCAT.
2335  */
2336
2337 STATIC I32
2338 S_sublex_push(pTHX)
2339 {
2340     dVAR;
2341     ENTER;
2342
2343     PL_lex_state = PL_sublex_info.super_state;
2344     SAVEBOOL(PL_lex_dojoin);
2345     SAVEI32(PL_lex_brackets);
2346     SAVEI32(PL_lex_casemods);
2347     SAVEI32(PL_lex_starts);
2348     SAVEI8(PL_lex_state);
2349     SAVEVPTR(PL_lex_inpat);
2350     SAVEI16(PL_lex_inwhat);
2351     SAVECOPLINE(PL_curcop);
2352     SAVEPPTR(PL_bufptr);
2353     SAVEPPTR(PL_bufend);
2354     SAVEPPTR(PL_oldbufptr);
2355     SAVEPPTR(PL_oldoldbufptr);
2356     SAVEPPTR(PL_last_lop);
2357     SAVEPPTR(PL_last_uni);
2358     SAVEPPTR(PL_linestart);
2359     SAVESPTR(PL_linestr);
2360     SAVEGENERICPV(PL_lex_brackstack);
2361     SAVEGENERICPV(PL_lex_casestack);
2362
2363     PL_linestr = PL_lex_stuff;
2364     PL_lex_stuff = NULL;
2365
2366     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2367         = SvPVX(PL_linestr);
2368     PL_bufend += SvCUR(PL_linestr);
2369     PL_last_lop = PL_last_uni = NULL;
2370     SAVEFREESV(PL_linestr);
2371
2372     PL_lex_dojoin = FALSE;
2373     PL_lex_brackets = 0;
2374     Newx(PL_lex_brackstack, 120, char);
2375     Newx(PL_lex_casestack, 12, char);
2376     PL_lex_casemods = 0;
2377     *PL_lex_casestack = '\0';
2378     PL_lex_starts = 0;
2379     PL_lex_state = LEX_INTERPCONCAT;
2380     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2381
2382     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2383     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2384         PL_lex_inpat = PL_sublex_info.sub_op;
2385     else
2386         PL_lex_inpat = NULL;
2387
2388     return '(';
2389 }
2390
2391 /*
2392  * S_sublex_done
2393  * Restores lexer state after a S_sublex_push.
2394  */
2395
2396 STATIC I32
2397 S_sublex_done(pTHX)
2398 {
2399     dVAR;
2400     if (!PL_lex_starts++) {
2401         SV * const sv = newSVpvs("");
2402         if (SvUTF8(PL_linestr))
2403             SvUTF8_on(sv);
2404         PL_expect = XOPERATOR;
2405         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2406         return THING;
2407     }
2408
2409     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2410         PL_lex_state = LEX_INTERPCASEMOD;
2411         return yylex();
2412     }
2413
2414     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2415     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2416         PL_linestr = PL_lex_repl;
2417         PL_lex_inpat = 0;
2418         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2419         PL_bufend += SvCUR(PL_linestr);
2420         PL_last_lop = PL_last_uni = NULL;
2421         SAVEFREESV(PL_linestr);
2422         PL_lex_dojoin = FALSE;
2423         PL_lex_brackets = 0;
2424         PL_lex_casemods = 0;
2425         *PL_lex_casestack = '\0';
2426         PL_lex_starts = 0;
2427         if (SvEVALED(PL_lex_repl)) {
2428             PL_lex_state = LEX_INTERPNORMAL;
2429             PL_lex_starts++;
2430             /*  we don't clear PL_lex_repl here, so that we can check later
2431                 whether this is an evalled subst; that means we rely on the
2432                 logic to ensure sublex_done() is called again only via the
2433                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2434         }
2435         else {
2436             PL_lex_state = LEX_INTERPCONCAT;
2437             PL_lex_repl = NULL;
2438         }
2439         return ',';
2440     }
2441     else {
2442 #ifdef PERL_MAD
2443         if (PL_madskills) {
2444             if (PL_thiswhite) {
2445                 if (!PL_endwhite)
2446                     PL_endwhite = newSVpvs("");
2447                 sv_catsv(PL_endwhite, PL_thiswhite);
2448                 PL_thiswhite = 0;
2449             }
2450             if (PL_thistoken)
2451                 sv_setpvs(PL_thistoken,"");
2452             else
2453                 PL_realtokenstart = -1;
2454         }
2455 #endif
2456         LEAVE;
2457         PL_bufend = SvPVX(PL_linestr);
2458         PL_bufend += SvCUR(PL_linestr);
2459         PL_expect = XOPERATOR;
2460         PL_sublex_info.sub_inwhat = 0;
2461         return ')';
2462     }
2463 }
2464
2465 /*
2466   scan_const
2467
2468   Extracts a pattern, double-quoted string, or transliteration.  This
2469   is terrifying code.
2470
2471   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2472   processing a pattern (PL_lex_inpat is true), a transliteration
2473   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2474
2475   Returns a pointer to the character scanned up to. If this is
2476   advanced from the start pointer supplied (i.e. if anything was
2477   successfully parsed), will leave an OP for the substring scanned
2478   in pl_yylval. Caller must intuit reason for not parsing further
2479   by looking at the next characters herself.
2480
2481   In patterns:
2482     backslashes:
2483       constants: \N{NAME} only
2484       case and quoting: \U \Q \E
2485     stops on @ and $, but not for $ as tail anchor
2486
2487   In transliterations:
2488     characters are VERY literal, except for - not at the start or end
2489     of the string, which indicates a range. If the range is in bytes,
2490     scan_const expands the range to the full set of intermediate
2491     characters. If the range is in utf8, the hyphen is replaced with
2492     a certain range mark which will be handled by pmtrans() in op.c.
2493
2494   In double-quoted strings:
2495     backslashes:
2496       double-quoted style: \r and \n
2497       constants: \x31, etc.
2498       deprecated backrefs: \1 (in substitution replacements)
2499       case and quoting: \U \Q \E
2500     stops on @ and $
2501
2502   scan_const does *not* construct ops to handle interpolated strings.
2503   It stops processing as soon as it finds an embedded $ or @ variable
2504   and leaves it to the caller to work out what's going on.
2505
2506   embedded arrays (whether in pattern or not) could be:
2507       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2508
2509   $ in double-quoted strings must be the symbol of an embedded scalar.
2510
2511   $ in pattern could be $foo or could be tail anchor.  Assumption:
2512   it's a tail anchor if $ is the last thing in the string, or if it's
2513   followed by one of "()| \r\n\t"
2514
2515   \1 (backreferences) are turned into $1
2516
2517   The structure of the code is
2518       while (there's a character to process) {
2519           handle transliteration ranges
2520           skip regexp comments /(?#comment)/ and codes /(?{code})/
2521           skip #-initiated comments in //x patterns
2522           check for embedded arrays
2523           check for embedded scalars
2524           if (backslash) {
2525               deprecate \1 in substitution replacements
2526               handle string-changing backslashes \l \U \Q \E, etc.
2527               switch (what was escaped) {
2528                   handle \- in a transliteration (becomes a literal -)
2529                   if a pattern and not \N{, go treat as regular character
2530                   handle \132 (octal characters)
2531                   handle \x15 and \x{1234} (hex characters)
2532                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2533                   handle \cV (control characters)
2534                   handle printf-style backslashes (\f, \r, \n, etc)
2535               } (end switch)
2536               continue
2537           } (end if backslash)
2538           handle regular character
2539     } (end while character to read)
2540
2541 */
2542
2543 STATIC char *
2544 S_scan_const(pTHX_ char *start)
2545 {
2546     dVAR;
2547     register char *send = PL_bufend;            /* end of the constant */
2548     SV *sv = newSV(send - start);               /* sv for the constant.  See
2549                                                    note below on sizing. */
2550     register char *s = start;                   /* start of the constant */
2551     register char *d = SvPVX(sv);               /* destination for copies */
2552     bool dorange = FALSE;                       /* are we in a translit range? */
2553     bool didrange = FALSE;                      /* did we just finish a range? */
2554     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2555     I32  this_utf8 = UTF;                       /* Is the source string assumed
2556                                                    to be UTF8?  But, this can
2557                                                    show as true when the source
2558                                                    isn't utf8, as for example
2559                                                    when it is entirely composed
2560                                                    of hex constants */
2561
2562     /* Note on sizing:  The scanned constant is placed into sv, which is
2563      * initialized by newSV() assuming one byte of output for every byte of
2564      * input.  This routine expects newSV() to allocate an extra byte for a
2565      * trailing NUL, which this routine will append if it gets to the end of
2566      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2567      * CAPITAL LETTER A}), or more output than input if the constant ends up
2568      * recoded to utf8, but each time a construct is found that might increase
2569      * the needed size, SvGROW() is called.  Its size parameter each time is
2570      * based on the best guess estimate at the time, namely the length used so
2571      * far, plus the length the current construct will occupy, plus room for
2572      * the trailing NUL, plus one byte for every input byte still unscanned */
2573
2574     UV uv;
2575 #ifdef EBCDIC
2576     UV literal_endpoint = 0;
2577     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2578 #endif
2579
2580     PERL_ARGS_ASSERT_SCAN_CONST;
2581
2582     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2583         /* If we are doing a trans and we know we want UTF8 set expectation */
2584         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2585         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2586     }
2587
2588
2589     while (s < send || dorange) {
2590
2591         /* get transliterations out of the way (they're most literal) */
2592         if (PL_lex_inwhat == OP_TRANS) {
2593             /* expand a range A-Z to the full set of characters.  AIE! */
2594             if (dorange) {
2595                 I32 i;                          /* current expanded character */
2596                 I32 min;                        /* first character in range */
2597                 I32 max;                        /* last character in range */
2598
2599 #ifdef EBCDIC
2600                 UV uvmax = 0;
2601 #endif
2602
2603                 if (has_utf8
2604 #ifdef EBCDIC
2605                     && !native_range
2606 #endif
2607                     ) {
2608                     char * const c = (char*)utf8_hop((U8*)d, -1);
2609                     char *e = d++;
2610                     while (e-- > c)
2611                         *(e + 1) = *e;
2612                     *c = (char)UTF_TO_NATIVE(0xff);
2613                     /* mark the range as done, and continue */
2614                     dorange = FALSE;
2615                     didrange = TRUE;
2616                     continue;
2617                 }
2618
2619                 i = d - SvPVX_const(sv);                /* remember current offset */
2620 #ifdef EBCDIC
2621                 SvGROW(sv,
2622                        SvLEN(sv) + (has_utf8 ?
2623                                     (512 - UTF_CONTINUATION_MARK +
2624                                      UNISKIP(0x100))
2625                                     : 256));
2626                 /* How many two-byte within 0..255: 128 in UTF-8,
2627                  * 96 in UTF-8-mod. */
2628 #else
2629                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2630 #endif
2631                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2632 #ifdef EBCDIC
2633                 if (has_utf8) {
2634                     int j;
2635                     for (j = 0; j <= 1; j++) {
2636                         char * const c = (char*)utf8_hop((U8*)d, -1);
2637                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2638                         if (j)
2639                             min = (U8)uv;
2640                         else if (uv < 256)
2641                             max = (U8)uv;
2642                         else {
2643                             max = (U8)0xff; /* only to \xff */
2644                             uvmax = uv; /* \x{100} to uvmax */
2645                         }
2646                         d = c; /* eat endpoint chars */
2647                      }
2648                 }
2649                else {
2650 #endif
2651                    d -= 2;              /* eat the first char and the - */
2652                    min = (U8)*d;        /* first char in range */
2653                    max = (U8)d[1];      /* last char in range  */
2654 #ifdef EBCDIC
2655                }
2656 #endif
2657
2658                 if (min > max) {
2659                     Perl_croak(aTHX_
2660                                "Invalid range \"%c-%c\" in transliteration operator",
2661                                (char)min, (char)max);
2662                 }
2663
2664 #ifdef EBCDIC
2665                 if (literal_endpoint == 2 &&
2666                     ((isLOWER(min) && isLOWER(max)) ||
2667                      (isUPPER(min) && isUPPER(max)))) {
2668                     if (isLOWER(min)) {
2669                         for (i = min; i <= max; i++)
2670                             if (isLOWER(i))
2671                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2672                     } else {
2673                         for (i = min; i <= max; i++)
2674                             if (isUPPER(i))
2675                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2676                     }
2677                 }
2678                 else
2679 #endif
2680                     for (i = min; i <= max; i++)
2681 #ifdef EBCDIC
2682                         if (has_utf8) {
2683                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2684                             if (UNI_IS_INVARIANT(ch))
2685                                 *d++ = (U8)i;
2686                             else {
2687                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2688                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2689                             }
2690                         }
2691                         else
2692 #endif
2693                             *d++ = (char)i;
2694
2695 #ifdef EBCDIC
2696                 if (uvmax) {
2697                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2698                     if (uvmax > 0x101)
2699                         *d++ = (char)UTF_TO_NATIVE(0xff);
2700                     if (uvmax > 0x100)
2701                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2702                 }
2703 #endif
2704
2705                 /* mark the range as done, and continue */
2706                 dorange = FALSE;
2707                 didrange = TRUE;
2708 #ifdef EBCDIC
2709                 literal_endpoint = 0;
2710 #endif
2711                 continue;
2712             }
2713
2714             /* range begins (ignore - as first or last char) */
2715             else if (*s == '-' && s+1 < send  && s != start) {
2716                 if (didrange) {
2717                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2718                 }
2719                 if (has_utf8
2720 #ifdef EBCDIC
2721                     && !native_range
2722 #endif
2723                     ) {
2724                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2725                     s++;
2726                     continue;
2727                 }
2728                 dorange = TRUE;
2729                 s++;
2730             }
2731             else {
2732                 didrange = FALSE;
2733 #ifdef EBCDIC
2734                 literal_endpoint = 0;
2735                 native_range = TRUE;
2736 #endif
2737             }
2738         }
2739
2740         /* if we get here, we're not doing a transliteration */
2741
2742         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2743            except for the last char, which will be done separately. */
2744         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2745             if (s[2] == '#') {
2746                 while (s+1 < send && *s != ')')
2747                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2748             }
2749             else if (s[2] == '{' /* This should match regcomp.c */
2750                     || (s[2] == '?' && s[3] == '{'))
2751             {
2752                 I32 count = 1;
2753                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2754                 char c;
2755
2756                 while (count && (c = *regparse)) {
2757                     if (c == '\\' && regparse[1])
2758                         regparse++;
2759                     else if (c == '{')
2760                         count++;
2761                     else if (c == '}')
2762                         count--;
2763                     regparse++;
2764                 }
2765                 if (*regparse != ')')
2766                     regparse--;         /* Leave one char for continuation. */
2767                 while (s < regparse)
2768                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2769             }
2770         }
2771
2772         /* likewise skip #-initiated comments in //x patterns */
2773         else if (*s == '#' && PL_lex_inpat &&
2774           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2775             while (s+1 < send && *s != '\n')
2776                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2777         }
2778
2779         /* check for embedded arrays
2780            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2781            */
2782         else if (*s == '@' && s[1]) {
2783             if (isALNUM_lazy_if(s+1,UTF))
2784                 break;
2785             if (strchr(":'{$", s[1]))
2786                 break;
2787             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2788                 break; /* in regexp, neither @+ nor @- are interpolated */
2789         }
2790
2791         /* check for embedded scalars.  only stop if we're sure it's a
2792            variable.
2793         */
2794         else if (*s == '$') {
2795             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2796                 break;
2797             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2798                 if (s[1] == '\\') {
2799                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2800                                    "Possible unintended interpolation of $\\ in regex");
2801                 }
2802                 break;          /* in regexp, $ might be tail anchor */
2803             }
2804         }
2805
2806         /* End of else if chain - OP_TRANS rejoin rest */
2807
2808         /* backslashes */
2809         if (*s == '\\' && s+1 < send) {
2810             char* e;    /* Can be used for ending '}', etc. */
2811
2812             s++;
2813
2814             /* deprecate \1 in strings and substitution replacements */
2815             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2816                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2817             {
2818                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2819                 *--s = '$';
2820                 break;
2821             }
2822
2823             /* string-change backslash escapes */
2824             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2825                 --s;
2826                 break;
2827             }
2828             /* In a pattern, process \N, but skip any other backslash escapes.
2829              * This is because we don't want to translate an escape sequence
2830              * into a meta symbol and have the regex compiler use the meta
2831              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2832              * in spite of this, we do have to process \N here while the proper
2833              * charnames handler is in scope.  See bugs #56444 and #62056.
2834              * There is a complication because \N in a pattern may also stand
2835              * for 'match a non-nl', and not mean a charname, in which case its
2836              * processing should be deferred to the regex compiler.  To be a
2837              * charname it must be followed immediately by a '{', and not look
2838              * like \N followed by a curly quantifier, i.e., not something like
2839              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2840              * quantifier */
2841             else if (PL_lex_inpat
2842                     && (*s != 'N'
2843                         || s[1] != '{'
2844                         || regcurly(s + 1)))
2845             {
2846                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2847                 goto default_action;
2848             }
2849
2850             switch (*s) {
2851
2852             /* quoted - in transliterations */
2853             case '-':
2854                 if (PL_lex_inwhat == OP_TRANS) {
2855                     *d++ = *s++;
2856                     continue;
2857                 }
2858                 /* FALL THROUGH */
2859             default:
2860                 {
2861                     if ((isALPHA(*s) || isDIGIT(*s)))
2862                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2863                                        "Unrecognized escape \\%c passed through",
2864                                        *s);
2865                     /* default action is to copy the quoted character */
2866                     goto default_action;
2867                 }
2868
2869             /* eg. \132 indicates the octal constant 0x132 */
2870             case '0': case '1': case '2': case '3':
2871             case '4': case '5': case '6': case '7':
2872                 {
2873                     I32 flags = 0;
2874                     STRLEN len = 3;
2875                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2876                     s += len;
2877                 }
2878                 goto NUM_ESCAPE_INSERT;
2879
2880             /* eg. \x24 indicates the hex constant 0x24 */
2881             case 'x':
2882                 ++s;
2883                 if (*s == '{') {
2884                     char* const e = strchr(s, '}');
2885                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2886                       PERL_SCAN_DISALLOW_PREFIX;
2887                     STRLEN len;
2888
2889                     ++s;
2890                     if (!e) {
2891                         yyerror("Missing right brace on \\x{}");
2892                         continue;
2893                     }
2894                     len = e - s;
2895                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2896                     s = e + 1;
2897                 }
2898                 else {
2899                     {
2900                         STRLEN len = 2;
2901                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2902                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2903                         s += len;
2904                     }
2905                 }
2906
2907               NUM_ESCAPE_INSERT:
2908                 /* Insert oct or hex escaped character.  There will always be
2909                  * enough room in sv since such escapes will be longer than any
2910                  * UTF-8 sequence they can end up as, except if they force us
2911                  * to recode the rest of the string into utf8 */
2912
2913                 /* Here uv is the ordinal of the next character being added in
2914                  * unicode (converted from native). */
2915                 if (!UNI_IS_INVARIANT(uv)) {
2916                     if (!has_utf8 && uv > 255) {
2917                         /* Might need to recode whatever we have accumulated so
2918                          * far if it contains any chars variant in utf8 or
2919                          * utf-ebcdic. */
2920
2921                         SvCUR_set(sv, d - SvPVX_const(sv));
2922                         SvPOK_on(sv);
2923                         *d = '\0';
2924                         /* See Note on sizing above.  */
2925                         sv_utf8_upgrade_flags_grow(sv,
2926                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2927                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2928                         d = SvPVX(sv) + SvCUR(sv);
2929                         has_utf8 = TRUE;
2930                     }
2931
2932                     if (has_utf8) {
2933                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2934                         if (PL_lex_inwhat == OP_TRANS &&
2935                             PL_sublex_info.sub_op) {
2936                             PL_sublex_info.sub_op->op_private |=
2937                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2938                                              : OPpTRANS_TO_UTF);
2939                         }
2940 #ifdef EBCDIC
2941                         if (uv > 255 && !dorange)
2942                             native_range = FALSE;
2943 #endif
2944                     }
2945                     else {
2946                         *d++ = (char)uv;
2947                     }
2948                 }
2949                 else {
2950                     *d++ = (char) uv;
2951                 }
2952                 continue;
2953
2954             case 'N':
2955                 /* In a non-pattern \N must be a named character, like \N{LATIN
2956                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
2957                  * mean to match a non-newline.  For non-patterns, named
2958                  * characters are converted to their string equivalents. In
2959                  * patterns, named characters are not converted to their
2960                  * ultimate forms for the same reasons that other escapes
2961                  * aren't.  Instead, they are converted to the \N{U+...} form
2962                  * to get the value from the charnames that is in effect right
2963                  * now, while preserving the fact that it was a named character
2964                  * so that the regex compiler knows this */
2965
2966                 /* This section of code doesn't generally use the
2967                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
2968                  * a close examination of this macro and determined it is a
2969                  * no-op except on utfebcdic variant characters.  Every
2970                  * character generated by this that would normally need to be
2971                  * enclosed by this macro is invariant, so the macro is not
2972                  * needed, and would complicate use of copy(). There are other
2973                  * parts of this file where the macro is used inconsistently,
2974                  * but are saved by it being a no-op */
2975
2976                 /* The structure of this section of code (besides checking for
2977                  * errors and upgrading to utf8) is:
2978                  *  Further disambiguate between the two meanings of \N, and if
2979                  *      not a charname, go process it elsewhere
2980                  *  If of form \N{U+...}, pass it through if a pattern;
2981                  *      otherwise convert to utf8
2982                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
2983                  *  pattern; otherwise convert to utf8 */
2984
2985                 /* Here, s points to the 'N'; the test below is guaranteed to
2986                  * succeed if we are being called on a pattern as we already
2987                  * know from a test above that the next character is a '{'.
2988                  * On a non-pattern \N must mean 'named sequence, which
2989                  * requires braces */
2990                 s++;
2991                 if (*s != '{') {
2992                     yyerror("Missing braces on \\N{}");
2993                     continue;
2994                 }
2995                 s++;
2996
2997                 /* If there is no matching '}', it is an error. */
2998                 if (! (e = strchr(s, '}'))) {
2999                     if (! PL_lex_inpat) {
3000                         yyerror("Missing right brace on \\N{}");
3001                     } else {
3002                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3003                     }
3004                     continue;
3005                 }
3006
3007                 /* Here it looks like a named character */
3008
3009                 if (PL_lex_inpat) {
3010
3011                     /* XXX This block is temporary code.  \N{} implies that the
3012                      * pattern is to have Unicode semantics, and therefore
3013                      * currently has to be encoded in utf8.  By putting it in
3014                      * utf8 now, we save a whole pass in the regular expression
3015                      * compiler.  Once that code is changed so Unicode
3016                      * semantics doesn't necessarily have to be in utf8, this
3017                      * block should be removed */
3018                     if (!has_utf8) {
3019                         SvCUR_set(sv, d - SvPVX_const(sv));
3020                         SvPOK_on(sv);
3021                         *d = '\0';
3022                         /* See Note on sizing above.  */
3023                         sv_utf8_upgrade_flags_grow(sv,
3024                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3025                                         /* 5 = '\N{' + cur char + NUL */
3026                                         (STRLEN)(send - s) + 5);
3027                         d = SvPVX(sv) + SvCUR(sv);
3028                         has_utf8 = TRUE;
3029                     }
3030                 }
3031
3032                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3033                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3034                                 | PERL_SCAN_DISALLOW_PREFIX;
3035                     STRLEN len;
3036
3037                     /* For \N{U+...}, the '...' is a unicode value even on
3038                      * EBCDIC machines */
3039                     s += 2;         /* Skip to next char after the 'U+' */
3040                     len = e - s;
3041                     uv = grok_hex(s, &len, &flags, NULL);
3042                     if (len == 0 || len != (STRLEN)(e - s)) {
3043                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3044                         s = e + 1;
3045                         continue;
3046                     }
3047
3048                     if (PL_lex_inpat) {
3049
3050                         /* Pass through to the regex compiler unchanged.  The
3051                          * reason we evaluated the number above is to make sure
3052                          * there wasn't a syntax error. */
3053                         s -= 5;     /* Include the '\N{U+' */
3054                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3055                         d += e - s + 1;
3056                     }
3057                     else {  /* Not a pattern: convert the hex to string */
3058
3059                          /* If destination is not in utf8, unconditionally
3060                           * recode it to be so.  This is because \N{} implies
3061                           * Unicode semantics, and scalars have to be in utf8
3062                           * to guarantee those semantics */
3063                         if (! has_utf8) {
3064                             SvCUR_set(sv, d - SvPVX_const(sv));
3065                             SvPOK_on(sv);
3066                             *d = '\0';
3067                             /* See Note on sizing above.  */
3068                             sv_utf8_upgrade_flags_grow(
3069                                         sv,
3070                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3071                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3072                             d = SvPVX(sv) + SvCUR(sv);
3073                             has_utf8 = TRUE;
3074                         }
3075
3076                         /* Add the string to the output */
3077                         if (UNI_IS_INVARIANT(uv)) {
3078                             *d++ = (char) uv;
3079                         }
3080                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3081                     }
3082                 }
3083                 else { /* Here is \N{NAME} but not \N{U+...}. */
3084
3085                     SV *res;            /* result from charnames */
3086                     const char *str;    /* the string in 'res' */
3087                     STRLEN len;         /* its length */
3088
3089                     /* Get the value for NAME */
3090                     res = newSVpvn(s, e - s);
3091                     res = new_constant( NULL, 0, "charnames",
3092                                         /* includes all of: \N{...} */
3093                                         res, NULL, s - 3, e - s + 4 );
3094
3095                     /* Most likely res will be in utf8 already since the
3096                      * standard charnames uses pack U, but a custom translator
3097                      * can leave it otherwise, so make sure.  XXX This can be
3098                      * revisited to not have charnames use utf8 for characters
3099                      * that don't need it when regexes don't have to be in utf8
3100                      * for Unicode semantics.  If doing so, remember EBCDIC */
3101                     sv_utf8_upgrade(res);
3102                     str = SvPV_const(res, len);
3103
3104                     /* Don't accept malformed input */
3105                     if (! is_utf8_string((U8 *) str, len)) {
3106                         yyerror("Malformed UTF-8 returned by \\N");
3107                     }
3108                     else if (PL_lex_inpat) {
3109
3110                         if (! len) { /* The name resolved to an empty string */
3111                             Copy("\\N{}", d, 4, char);
3112                             d += 4;
3113                         }
3114                         else {
3115                             /* In order to not lose information for the regex
3116                             * compiler, pass the result in the specially made
3117                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3118                             * the code points in hex of each character
3119                             * returned by charnames */
3120
3121                             const char *str_end = str + len;
3122                             STRLEN char_length;     /* cur char's byte length */
3123                             STRLEN output_length;   /* and the number of bytes
3124                                                        after this is translated
3125                                                        into hex digits */
3126                             const STRLEN off = d - SvPVX_const(sv);
3127
3128                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3129                              * max('U+', '.'); and 1 for NUL */
3130                             char hex_string[2 * UTF8_MAXBYTES + 5];
3131
3132                             /* Get the first character of the result. */
3133                             U32 uv = utf8n_to_uvuni((U8 *) str,
3134                                                     len,
3135                                                     &char_length,
3136                                                     UTF8_ALLOW_ANYUV);
3137
3138                             /* The call to is_utf8_string() above hopefully
3139                              * guarantees that there won't be an error.  But
3140                              * it's easy here to make sure.  The function just
3141                              * above warns and returns 0 if invalid utf8, but
3142                              * it can also return 0 if the input is validly a
3143                              * NUL. Disambiguate */
3144                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3145                                 uv = UNICODE_REPLACEMENT;
3146                             }
3147
3148                             /* Convert first code point to hex, including the
3149                              * boiler plate before it */
3150                             sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3151                             output_length = strlen(hex_string);
3152
3153                             /* Make sure there is enough space to hold it */
3154                             d = off + SvGROW(sv, off
3155                                                  + output_length
3156                                                  + (STRLEN)(send - e)
3157                                                  + 2);  /* '}' + NUL */
3158                             /* And output it */
3159                             Copy(hex_string, d, output_length, char);
3160                             d += output_length;
3161
3162                             /* For each subsequent character, append dot and
3163                              * its ordinal in hex */
3164                             while ((str += char_length) < str_end) {
3165                                 const STRLEN off = d - SvPVX_const(sv);
3166                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3167                                                         str_end - str,
3168                                                         &char_length,
3169                                                         UTF8_ALLOW_ANYUV);
3170                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3171                                     uv = UNICODE_REPLACEMENT;
3172                                 }
3173
3174                                 sprintf(hex_string, ".%X", (unsigned int) uv);
3175                                 output_length = strlen(hex_string);
3176
3177                                 d = off + SvGROW(sv, off
3178                                                      + output_length
3179                                                      + (STRLEN)(send - e)
3180                                                      + 2);      /* '}' +  NUL */
3181                                 Copy(hex_string, d, output_length, char);
3182                                 d += output_length;
3183                             }
3184
3185                             *d++ = '}'; /* Done.  Add the trailing brace */
3186                         }
3187                     }
3188                     else { /* Here, not in a pattern.  Convert the name to a
3189                             * string. */
3190
3191                          /* If destination is not in utf8, unconditionally
3192                           * recode it to be so.  This is because \N{} implies
3193                           * Unicode semantics, and scalars have to be in utf8
3194                           * to guarantee those semantics */
3195                         if (! has_utf8) {
3196                             SvCUR_set(sv, d - SvPVX_const(sv));
3197                             SvPOK_on(sv);
3198                             *d = '\0';
3199                             /* See Note on sizing above.  */
3200                             sv_utf8_upgrade_flags_grow(sv,
3201                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3202                                                 len + (STRLEN)(send - s) + 1);
3203                             d = SvPVX(sv) + SvCUR(sv);
3204                             has_utf8 = TRUE;
3205                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3206
3207                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3208                              * set correctly here). */
3209                             const STRLEN off = d - SvPVX_const(sv);
3210                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3211                         }
3212                         Copy(str, d, len, char);
3213                         d += len;
3214                     }
3215                     SvREFCNT_dec(res);
3216
3217                     /* Deprecate non-approved name syntax */
3218                     if (ckWARN_d(WARN_DEPRECATED)) {
3219                         bool problematic = FALSE;
3220                         char* i = s;
3221
3222                         /* For non-ut8 input, look to see that the first
3223                          * character is an alpha, then loop through the rest
3224                          * checking that each is a continuation */
3225                         if (! this_utf8) {
3226                             if (! isALPHAU(*i)) problematic = TRUE;
3227                             else for (i = s + 1; i < e; i++) {
3228                                 if (isCHARNAME_CONT(*i)) continue;
3229                                 problematic = TRUE;
3230                                 break;
3231                             }
3232                         }
3233                         else {
3234                             /* Similarly for utf8.  For invariants can check
3235                              * directly.  We accept anything above the latin1
3236                              * range because it is immaterial to Perl if it is
3237                              * correct or not, and is expensive to check.  But
3238                              * it is fairly easy in the latin1 range to convert
3239                              * the variants into a single character and check
3240                              * those */
3241                             if (UTF8_IS_INVARIANT(*i)) {
3242                                 if (! isALPHAU(*i)) problematic = TRUE;
3243                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3244                                 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3245                                                                             *(i+1)))))
3246                                 {
3247                                     problematic = TRUE;
3248                                 }
3249                             }
3250                             if (! problematic) for (i = s + UTF8SKIP(s);
3251                                                     i < e;
3252                                                     i+= UTF8SKIP(i))
3253                             {
3254                                 if (UTF8_IS_INVARIANT(*i)) {
3255                                     if (isCHARNAME_CONT(*i)) continue;
3256                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3257                                     continue;
3258                                 } else if (isCHARNAME_CONT(
3259                                             UNI_TO_NATIVE(
3260                                             UTF8_ACCUMULATE(*i, *(i+1)))))
3261                                 {
3262                                     continue;
3263                                 }
3264                                 problematic = TRUE;
3265                                 break;
3266                             }
3267                         }
3268                         if (problematic) {
3269                             /* The e-i passed to the final %.*s makes sure that
3270                              * should the trailing NUL be missing that this
3271                              * print won't run off the end of the string */
3272                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3273                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3274                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3275                         }
3276                     }
3277                 } /* End \N{NAME} */
3278 #ifdef EBCDIC
3279                 if (!dorange)
3280                     native_range = FALSE; /* \N{} is defined to be Unicode */
3281 #endif
3282                 s = e + 1;  /* Point to just after the '}' */
3283                 continue;
3284
3285             /* \c is a control character */
3286             case 'c':
3287                 s++;
3288                 if (s < send) {
3289                     *d++ = grok_bslash_c(*s++, 1);
3290                 }
3291                 else {
3292                     yyerror("Missing control char name in \\c");
3293                 }
3294                 continue;
3295
3296             /* printf-style backslashes, formfeeds, newlines, etc */
3297             case 'b':
3298                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3299                 break;
3300             case 'n':
3301                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3302                 break;
3303             case 'r':
3304                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3305                 break;
3306             case 'f':
3307                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3308                 break;
3309             case 't':
3310                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3311                 break;
3312             case 'e':
3313                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3314                 break;
3315             case 'a':
3316                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3317                 break;
3318             } /* end switch */
3319
3320             s++;
3321             continue;
3322         } /* end if (backslash) */
3323 #ifdef EBCDIC
3324         else
3325             literal_endpoint++;
3326 #endif
3327
3328     default_action:
3329         /* If we started with encoded form, or already know we want it,
3330            then encode the next character */
3331         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3332             STRLEN len  = 1;
3333
3334
3335             /* One might think that it is wasted effort in the case of the
3336              * source being utf8 (this_utf8 == TRUE) to take the next character
3337              * in the source, convert it to an unsigned value, and then convert
3338              * it back again.  But the source has not been validated here.  The
3339              * routine that does the conversion checks for errors like
3340              * malformed utf8 */
3341
3342             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3343             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3344             if (!has_utf8) {
3345                 SvCUR_set(sv, d - SvPVX_const(sv));
3346                 SvPOK_on(sv);
3347                 *d = '\0';
3348                 /* See Note on sizing above.  */
3349                 sv_utf8_upgrade_flags_grow(sv,
3350                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3351                                         need + (STRLEN)(send - s) + 1);
3352                 d = SvPVX(sv) + SvCUR(sv);
3353                 has_utf8 = TRUE;
3354             } else if (need > len) {
3355                 /* encoded value larger than old, may need extra space (NOTE:
3356                  * SvCUR() is not set correctly here).   See Note on sizing
3357                  * above.  */
3358                 const STRLEN off = d - SvPVX_const(sv);
3359                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3360             }
3361             s += len;
3362
3363             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3364 #ifdef EBCDIC
3365             if (uv > 255 && !dorange)
3366                 native_range = FALSE;
3367 #endif
3368         }
3369         else {
3370             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3371         }
3372     } /* while loop to process each character */
3373
3374     /* terminate the string and set up the sv */
3375     *d = '\0';
3376     SvCUR_set(sv, d - SvPVX_const(sv));
3377     if (SvCUR(sv) >= SvLEN(sv))
3378         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3379
3380     SvPOK_on(sv);
3381     if (PL_encoding && !has_utf8) {
3382         sv_recode_to_utf8(sv, PL_encoding);
3383         if (SvUTF8(sv))
3384             has_utf8 = TRUE;
3385     }
3386     if (has_utf8) {
3387         SvUTF8_on(sv);
3388         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3389             PL_sublex_info.sub_op->op_private |=
3390                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3391         }
3392     }
3393
3394     /* shrink the sv if we allocated more than we used */
3395     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3396         SvPV_shrink_to_cur(sv);
3397     }
3398
3399     /* return the substring (via pl_yylval) only if we parsed anything */
3400     if (s > PL_bufptr) {
3401         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3402             const char *const key = PL_lex_inpat ? "qr" : "q";
3403             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3404             const char *type;
3405             STRLEN typelen;
3406
3407             if (PL_lex_inwhat == OP_TRANS) {
3408                 type = "tr";
3409                 typelen = 2;
3410             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3411                 type = "s";
3412                 typelen = 1;
3413             } else  {
3414                 type = "qq";
3415                 typelen = 2;
3416             }
3417
3418             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3419                                 type, typelen);
3420         }
3421         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3422     } else
3423         SvREFCNT_dec(sv);
3424     return s;
3425 }
3426
3427 /* S_intuit_more
3428  * Returns TRUE if there's more to the expression (e.g., a subscript),
3429  * FALSE otherwise.
3430  *
3431  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3432  *
3433  * ->[ and ->{ return TRUE
3434  * { and [ outside a pattern are always subscripts, so return TRUE
3435  * if we're outside a pattern and it's not { or [, then return FALSE
3436  * if we're in a pattern and the first char is a {
3437  *   {4,5} (any digits around the comma) returns FALSE
3438  * if we're in a pattern and the first char is a [
3439  *   [] returns FALSE
3440  *   [SOMETHING] has a funky algorithm to decide whether it's a
3441  *      character class or not.  It has to deal with things like
3442  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3443  * anything else returns TRUE
3444  */
3445
3446 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3447
3448 STATIC int
3449 S_intuit_more(pTHX_ register char *s)
3450 {
3451     dVAR;
3452
3453     PERL_ARGS_ASSERT_INTUIT_MORE;
3454
3455     if (PL_lex_brackets)
3456         return TRUE;
3457     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3458         return TRUE;
3459     if (*s != '{' && *s != '[')
3460         return FALSE;
3461     if (!PL_lex_inpat)
3462         return TRUE;
3463
3464     /* In a pattern, so maybe we have {n,m}. */
3465     if (*s == '{') {
3466         s++;
3467         if (!isDIGIT(*s))
3468             return TRUE;
3469         while (isDIGIT(*s))
3470             s++;
3471         if (*s == ',')
3472             s++;
3473         while (isDIGIT(*s))
3474             s++;
3475         if (*s == '}')
3476             return FALSE;
3477         return TRUE;
3478
3479     }
3480
3481     /* On the other hand, maybe we have a character class */
3482
3483     s++;
3484     if (*s == ']' || *s == '^')
3485         return FALSE;
3486     else {
3487         /* this is terrifying, and it works */
3488         int weight = 2;         /* let's weigh the evidence */
3489         char seen[256];
3490         unsigned char un_char = 255, last_un_char;
3491         const char * const send = strchr(s,']');
3492         char tmpbuf[sizeof PL_tokenbuf * 4];
3493
3494         if (!send)              /* has to be an expression */
3495             return TRUE;
3496
3497         Zero(seen,256,char);
3498         if (*s == '$')
3499             weight -= 3;
3500         else if (isDIGIT(*s)) {
3501             if (s[1] != ']') {
3502                 if (isDIGIT(s[1]) && s[2] == ']')
3503                     weight -= 10;
3504             }
3505             else
3506                 weight -= 100;
3507         }
3508         for (; s < send; s++) {
3509             last_un_char = un_char;
3510             un_char = (unsigned char)*s;
3511             switch (*s) {
3512             case '@':
3513             case '&':
3514             case '$':
3515                 weight -= seen[un_char] * 10;
3516                 if (isALNUM_lazy_if(s+1,UTF)) {
3517                     int len;
3518                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3519                     len = (int)strlen(tmpbuf);
3520                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3521                         weight -= 100;
3522                     else
3523                         weight -= 10;
3524                 }
3525                 else if (*s == '$' && s[1] &&
3526                   strchr("[#!%*<>()-=",s[1])) {
3527                     if (/*{*/ strchr("])} =",s[2]))
3528                         weight -= 10;
3529                     else
3530                         weight -= 1;
3531                 }
3532                 break;
3533             case '\\':
3534                 un_char = 254;
3535                 if (s[1]) {
3536                     if (strchr("wds]",s[1]))
3537                         weight += 100;
3538                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3539                         weight += 1;
3540                     else if (strchr("rnftbxcav",s[1]))
3541                         weight += 40;
3542                     else if (isDIGIT(s[1])) {
3543                         weight += 40;
3544                         while (s[1] && isDIGIT(s[1]))
3545                             s++;
3546                     }
3547                 }
3548                 else
3549                     weight += 100;
3550                 break;
3551             case '-':
3552                 if (s[1] == '\\')
3553                     weight += 50;
3554                 if (strchr("aA01! ",last_un_char))
3555                     weight += 30;
3556                 if (strchr("zZ79~",s[1]))
3557                     weight += 30;
3558                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3559                     weight -= 5;        /* cope with negative subscript */
3560                 break;
3561             default:
3562                 if (!isALNUM(last_un_char)
3563                     && !(last_un_char == '$' || last_un_char == '@'
3564                          || last_un_char == '&')
3565                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3566                     char *d = tmpbuf;
3567                     while (isALPHA(*s))
3568                         *d++ = *s++;
3569                     *d = '\0';
3570                     if (keyword(tmpbuf, d - tmpbuf, 0))
3571                         weight -= 150;
3572                 }
3573                 if (un_char == last_un_char + 1)
3574                     weight += 5;
3575                 weight -= seen[un_char];
3576                 break;
3577             }
3578             seen[un_char]++;
3579         }
3580         if (weight >= 0)        /* probably a character class */
3581             return FALSE;
3582     }
3583
3584     return TRUE;
3585 }
3586
3587 /*
3588  * S_intuit_method
3589  *
3590  * Does all the checking to disambiguate
3591  *   foo bar
3592  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3593  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3594  *
3595  * First argument is the stuff after the first token, e.g. "bar".
3596  *
3597  * Not a method if bar is a filehandle.
3598  * Not a method if foo is a subroutine prototyped to take a filehandle.
3599  * Not a method if it's really "Foo $bar"
3600  * Method if it's "foo $bar"
3601  * Not a method if it's really "print foo $bar"
3602  * Method if it's really "foo package::" (interpreted as package->foo)
3603  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3604  * Not a method if bar is a filehandle or package, but is quoted with
3605  *   =>
3606  */
3607
3608 STATIC int
3609 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3610 {
3611     dVAR;
3612     char *s = start + (*start == '$');
3613     char tmpbuf[sizeof PL_tokenbuf];
3614     STRLEN len;
3615     GV* indirgv;
3616 #ifdef PERL_MAD
3617     int soff;
3618 #endif
3619
3620     PERL_ARGS_ASSERT_INTUIT_METHOD;
3621
3622     if (gv) {
3623         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3624             return 0;
3625         if (cv) {
3626             if (SvPOK(cv)) {
3627                 const char *proto = SvPVX_const(cv);
3628                 if (proto) {
3629                     if (*proto == ';')
3630                         proto++;
3631                     if (*proto == '*')
3632                         return 0;
3633                 }
3634             }
3635         } else
3636             gv = NULL;
3637     }
3638     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3639     /* start is the beginning of the possible filehandle/object,
3640      * and s is the end of it
3641      * tmpbuf is a copy of it
3642      */
3643
3644     if (*start == '$') {
3645         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3646                 isUPPER(*PL_tokenbuf))
3647             return 0;
3648 #ifdef PERL_MAD
3649         len = start - SvPVX(PL_linestr);
3650 #endif
3651         s = PEEKSPACE(s);
3652 #ifdef PERL_MAD
3653         start = SvPVX(PL_linestr) + len;
3654 #endif
3655         PL_bufptr = start;
3656         PL_expect = XREF;
3657         return *s == '(' ? FUNCMETH : METHOD;
3658     }
3659     if (!keyword(tmpbuf, len, 0)) {
3660         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3661             len -= 2;
3662             tmpbuf[len] = '\0';
3663 #ifdef PERL_MAD
3664             soff = s - SvPVX(PL_linestr);
3665 #endif
3666             goto bare_package;
3667         }
3668         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3669         if (indirgv && GvCVu(indirgv))
3670             return 0;
3671         /* filehandle or package name makes it a method */
3672         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3673 #ifdef PERL_MAD
3674             soff = s - SvPVX(PL_linestr);
3675 #endif
3676             s = PEEKSPACE(s);
3677             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3678                 return 0;       /* no assumptions -- "=>" quotes bearword */
3679       bare_package:
3680             start_force(PL_curforce);
3681             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3682                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3683             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3684             if (PL_madskills)
3685                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3686             PL_expect = XTERM;
3687             force_next(WORD);
3688             PL_bufptr = s;
3689 #ifdef PERL_MAD
3690             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3691 #endif
3692             return *s == '(' ? FUNCMETH : METHOD;
3693         }
3694     }
3695     return 0;
3696 }
3697
3698 /* Encoded script support. filter_add() effectively inserts a
3699  * 'pre-processing' function into the current source input stream.
3700  * Note that the filter function only applies to the current source file
3701  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3702  *
3703  * The datasv parameter (which may be NULL) can be used to pass
3704  * private data to this instance of the filter. The filter function
3705  * can recover the SV using the FILTER_DATA macro and use it to
3706  * store private buffers and state information.
3707  *
3708  * The supplied datasv parameter is upgraded to a PVIO type
3709  * and the IoDIRP/IoANY field is used to store the function pointer,
3710  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3711  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3712  * private use must be set using malloc'd pointers.
3713  */
3714
3715 SV *
3716 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3717 {
3718     dVAR;
3719     if (!funcp)
3720         return NULL;
3721
3722     if (!PL_parser)
3723         return NULL;
3724
3725     if (!PL_rsfp_filters)
3726         PL_rsfp_filters = newAV();
3727     if (!datasv)
3728         datasv = newSV(0);
3729     SvUPGRADE(datasv, SVt_PVIO);
3730     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3731     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3732     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3733                           FPTR2DPTR(void *, IoANY(datasv)),
3734                           SvPV_nolen(datasv)));
3735     av_unshift(PL_rsfp_filters, 1);
3736     av_store(PL_rsfp_filters, 0, datasv) ;
3737     return(datasv);
3738 }
3739
3740
3741 /* Delete most recently added instance of this filter function. */
3742 void
3743 Perl_filter_del(pTHX_ filter_t funcp)
3744 {
3745     dVAR;
3746     SV *datasv;
3747
3748     PERL_ARGS_ASSERT_FILTER_DEL;
3749
3750 #ifdef DEBUGGING
3751     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3752                           FPTR2DPTR(void*, funcp)));
3753 #endif
3754     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3755         return;
3756     /* if filter is on top of stack (usual case) just pop it off */
3757     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3758     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3759         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3760         IoANY(datasv) = (void *)NULL;
3761         sv_free(av_pop(PL_rsfp_filters));
3762
3763         return;
3764     }
3765     /* we need to search for the correct entry and clear it     */
3766     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3767 }
3768
3769
3770 /* Invoke the idxth filter function for the current rsfp.        */
3771 /* maxlen 0 = read one text line */
3772 I32
3773 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3774 {
3775     dVAR;
3776     filter_t funcp;
3777     SV *datasv = NULL;
3778     /* This API is bad. It should have been using unsigned int for maxlen.
3779        Not sure if we want to change the API, but if not we should sanity
3780        check the value here.  */
3781     const unsigned int correct_length
3782         = maxlen < 0 ?
3783 #ifdef PERL_MICRO
3784         0x7FFFFFFF
3785 #else
3786         INT_MAX
3787 #endif
3788         : maxlen;
3789
3790     PERL_ARGS_ASSERT_FILTER_READ;
3791
3792     if (!PL_parser || !PL_rsfp_filters)
3793         return -1;
3794     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3795         /* Provide a default input filter to make life easy.    */
3796         /* Note that we append to the line. This is handy.      */
3797         DEBUG_P(PerlIO_printf(Perl_debug_log,
3798                               "filter_read %d: from rsfp\n", idx));
3799         if (correct_length) {
3800             /* Want a block */
3801             int len ;
3802             const int old_len = SvCUR(buf_sv);
3803
3804             /* ensure buf_sv is large enough */
3805             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3806             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3807                                    correct_length)) <= 0) {
3808                 if (PerlIO_error(PL_rsfp))
3809                     return -1;          /* error */
3810                 else
3811                     return 0 ;          /* end of file */
3812             }
3813             SvCUR_set(buf_sv, old_len + len) ;
3814             SvPVX(buf_sv)[old_len + len] = '\0';
3815         } else {
3816             /* Want a line */
3817             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3818                 if (PerlIO_error(PL_rsfp))
3819                     return -1;          /* error */
3820                 else
3821                     return 0 ;          /* end of file */
3822             }
3823         }
3824         return SvCUR(buf_sv);
3825     }
3826     /* Skip this filter slot if filter has been deleted */
3827     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3828         DEBUG_P(PerlIO_printf(Perl_debug_log,
3829                               "filter_read %d: skipped (filter deleted)\n",
3830                               idx));
3831         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3832     }
3833     /* Get function pointer hidden within datasv        */
3834     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3835     DEBUG_P(PerlIO_printf(Perl_debug_log,
3836                           "filter_read %d: via function %p (%s)\n",
3837                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3838     /* Call function. The function is expected to       */
3839     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3840     /* Return: <0:error, =0:eof, >0:not eof             */
3841     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3842 }
3843
3844 STATIC char *
3845 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3846 {
3847     dVAR;
3848
3849     PERL_ARGS_ASSERT_FILTER_GETS;
3850
3851 #ifdef PERL_CR_FILTER
3852     if (!PL_rsfp_filters) {
3853         filter_add(S_cr_textfilter,NULL);
3854     }
3855 #endif
3856     if (PL_rsfp_filters) {
3857         if (!append)
3858             SvCUR_set(sv, 0);   /* start with empty line        */
3859         if (FILTER_READ(0, sv, 0) > 0)
3860             return ( SvPVX(sv) ) ;
3861         else
3862             return NULL ;
3863     }
3864     else
3865         return (sv_gets(sv, PL_rsfp, append));
3866 }
3867
3868 STATIC HV *
3869 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3870 {
3871     dVAR;
3872     GV *gv;
3873
3874     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3875
3876     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3877         return PL_curstash;
3878
3879     if (len > 2 &&
3880         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3881         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3882     {
3883         return GvHV(gv);                        /* Foo:: */
3884     }
3885
3886     /* use constant CLASS => 'MyClass' */
3887     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3888     if (gv && GvCV(gv)) {
3889         SV * const sv = cv_const_sv(GvCV(gv));
3890         if (sv)
3891             pkgname = SvPV_const(sv, len);
3892     }
3893
3894     return gv_stashpvn(pkgname, len, 0);
3895 }
3896
3897 /*
3898  * S_readpipe_override
3899  * Check whether readpipe() is overriden, and generates the appropriate
3900  * optree, provided sublex_start() is called afterwards.
3901  */
3902 STATIC void
3903 S_readpipe_override(pTHX)
3904 {
3905     GV **gvp;
3906     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3907     pl_yylval.ival = OP_BACKTICK;
3908     if ((gv_readpipe
3909                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3910             ||
3911             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3912              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3913              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3914     {
3915         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3916             append_elem(OP_LIST,
3917                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3918                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3919     }
3920 }
3921
3922 #ifdef PERL_MAD
3923  /*
3924  * Perl_madlex
3925  * The intent of this yylex wrapper is to minimize the changes to the
3926  * tokener when we aren't interested in collecting madprops.  It remains
3927  * to be seen how successful this strategy will be...
3928  */
3929
3930 int
3931 Perl_madlex(pTHX)
3932 {
3933     int optype;
3934     char *s = PL_bufptr;
3935
3936     /* make sure PL_thiswhite is initialized */
3937     PL_thiswhite = 0;
3938     PL_thismad = 0;
3939
3940     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3941     if (PL_pending_ident)
3942         return S_pending_ident(aTHX);
3943
3944     /* previous token ate up our whitespace? */
3945     if (!PL_lasttoke && PL_nextwhite) {
3946         PL_thiswhite = PL_nextwhite;
3947         PL_nextwhite = 0;
3948     }
3949
3950     /* isolate the token, and figure out where it is without whitespace */
3951     PL_realtokenstart = -1;
3952     PL_thistoken = 0;
3953     optype = yylex();
3954     s = PL_bufptr;
3955     assert(PL_curforce < 0);
3956
3957     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3958         if (!PL_thistoken) {
3959             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3960                 PL_thistoken = newSVpvs("");
3961             else {
3962                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3963                 PL_thistoken = newSVpvn(tstart, s - tstart);
3964             }
3965         }
3966         if (PL_thismad) /* install head */
3967             CURMAD('X', PL_thistoken);
3968     }
3969
3970     /* last whitespace of a sublex? */
3971     if (optype == ')' && PL_endwhite) {
3972         CURMAD('X', PL_endwhite);
3973     }
3974
3975     if (!PL_thismad) {
3976
3977         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3978         if (!PL_thiswhite && !PL_endwhite && !optype) {
3979             sv_free(PL_thistoken);
3980             PL_thistoken = 0;
3981             return 0;
3982         }
3983
3984         /* put off final whitespace till peg */
3985         if (optype == ';' && !PL_rsfp) {
3986             PL_nextwhite = PL_thiswhite;
3987             PL_thiswhite = 0;
3988         }
3989         else if (PL_thisopen) {
3990             CURMAD('q', PL_thisopen);
3991             if (PL_thistoken)
3992                 sv_free(PL_thistoken);
3993             PL_thistoken = 0;
3994         }
3995         else {
3996             /* Store actual token text as madprop X */
3997             CURMAD('X', PL_thistoken);
3998         }
3999
4000         if (PL_thiswhite) {
4001             /* add preceding whitespace as madprop _ */
4002             CURMAD('_', PL_thiswhite);
4003         }
4004
4005         if (PL_thisstuff) {
4006             /* add quoted material as madprop = */
4007             CURMAD('=', PL_thisstuff);
4008         }
4009
4010         if (PL_thisclose) {
4011             /* add terminating quote as madprop Q */
4012             CURMAD('Q', PL_thisclose);
4013         }
4014     }
4015
4016     /* special processing based on optype */
4017
4018     switch (optype) {
4019
4020     /* opval doesn't need a TOKEN since it can already store mp */
4021     case WORD:
4022     case METHOD:
4023     case FUNCMETH:
4024     case THING:
4025     case PMFUNC:
4026     case PRIVATEREF:
4027     case FUNC0SUB:
4028     case UNIOPSUB:
4029     case LSTOPSUB:
4030         if (pl_yylval.opval)
4031             append_madprops(PL_thismad, pl_yylval.opval, 0);
4032         PL_thismad = 0;
4033         return optype;
4034
4035     /* fake EOF */
4036     case 0:
4037         optype = PEG;
4038         if (PL_endwhite) {
4039             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4040             PL_endwhite = 0;
4041         }
4042         break;
4043
4044     case ']':
4045     case '}':
4046         if (PL_faketokens)
4047             break;
4048         /* remember any fake bracket that lexer is about to discard */
4049         if (PL_lex_brackets == 1 &&
4050             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4051         {
4052             s = PL_bufptr;
4053             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4054                 s++;
4055             if (*s == '}') {
4056                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4057                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4058                 PL_thiswhite = 0;
4059                 PL_bufptr = s - 1;
4060                 break;  /* don't bother looking for trailing comment */
4061             }
4062             else
4063                 s = PL_bufptr;
4064         }
4065         if (optype == ']')
4066             break;
4067         /* FALLTHROUGH */
4068
4069     /* attach a trailing comment to its statement instead of next token */
4070     case ';':
4071         if (PL_faketokens)
4072             break;
4073         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4074             s = PL_bufptr;
4075             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4076                 s++;
4077             if (*s == '\n' || *s == '#') {
4078                 while (s < PL_bufend && *s != '\n')
4079                     s++;
4080                 if (s < PL_bufend)
4081                     s++;
4082                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4083                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4084                 PL_thiswhite = 0;
4085                 PL_bufptr = s;
4086             }
4087         }
4088         break;
4089
4090     /* pval */
4091     case LABEL:
4092         break;
4093
4094     /* ival */
4095     default:
4096         break;
4097
4098     }
4099
4100     /* Create new token struct.  Note: opvals return early above. */
4101     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4102     PL_thismad = 0;
4103     return optype;
4104 }
4105 #endif
4106
4107 STATIC char *
4108 S_tokenize_use(pTHX_ int is_use, char *s) {
4109     dVAR;
4110
4111     PERL_ARGS_ASSERT_TOKENIZE_USE;
4112
4113     if (PL_expect != XSTATE)
4114         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4115                     is_use ? "use" : "no"));
4116     s = SKIPSPACE1(s);
4117     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4118         s = force_version(s, TRUE);
4119         if (*s == ';' || *s == '}'
4120                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4121             start_force(PL_curforce);
4122             NEXTVAL_NEXTTOKE.opval = NULL;
4123             force_next(WORD);
4124         }
4125         else if (*s == 'v') {
4126             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4127             s = force_version(s, FALSE);
4128         }
4129     }
4130     else {
4131         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4132         s = force_version(s, FALSE);
4133     }
4134     pl_yylval.ival = is_use;
4135     return s;
4136 }
4137 #ifdef DEBUGGING
4138     static const char* const exp_name[] =
4139         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4140           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4141         };
4142 #endif
4143
4144 /*
4145   yylex
4146
4147   Works out what to call the token just pulled out of the input
4148   stream.  The yacc parser takes care of taking the ops we return and
4149   stitching them into a tree.
4150
4151   Returns:
4152     PRIVATEREF
4153
4154   Structure:
4155       if read an identifier
4156           if we're in a my declaration
4157               croak if they tried to say my($foo::bar)
4158               build the ops for a my() declaration
4159           if it's an access to a my() variable
4160               are we in a sort block?
4161                   croak if my($a); $a <=> $b
4162               build ops for access to a my() variable
4163           if in a dq string, and they've said @foo and we can't find @foo
4164               croak
4165           build ops for a bareword
4166       if we already built the token before, use it.
4167 */
4168
4169
4170 #ifdef __SC__
4171 #pragma segment Perl_yylex
4172 #endif
4173 int
4174 Perl_yylex(pTHX)
4175 {
4176     dVAR;
4177     register char *s = PL_bufptr;
4178     register char *d;
4179     STRLEN len;
4180     bool bof = FALSE;
4181     U32 fake_eof = 0;
4182
4183     /* orig_keyword, gvp, and gv are initialized here because
4184      * jump to the label just_a_word_zero can bypass their
4185      * initialization later. */
4186     I32 orig_keyword = 0;
4187     GV *gv = NULL;
4188     GV **gvp = NULL;
4189
4190     DEBUG_T( {
4191         SV* tmp = newSVpvs("");
4192         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4193             (IV)CopLINE(PL_curcop),
4194             lex_state_names[PL_lex_state],
4195             exp_name[PL_expect],
4196             pv_display(tmp, s, strlen(s), 0, 60));
4197         SvREFCNT_dec(tmp);
4198     } );
4199     /* check if there's an identifier for us to look at */
4200     if (PL_pending_ident)
4201         return REPORT(S_pending_ident(aTHX));
4202
4203     /* no identifier pending identification */
4204
4205     switch (PL_lex_state) {
4206 #ifdef COMMENTARY
4207     case LEX_NORMAL:            /* Some compilers will produce faster */
4208     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4209         break;
4210 #endif
4211
4212     /* when we've already built the next token, just pull it out of the queue */
4213     case LEX_KNOWNEXT:
4214 #ifdef PERL_MAD
4215         PL_lasttoke--;
4216         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4217         if (PL_madskills) {
4218             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4219             PL_nexttoke[PL_lasttoke].next_mad = 0;
4220             if (PL_thismad && PL_thismad->mad_key == '_') {
4221                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4222                 PL_thismad->mad_val = 0;
4223                 mad_free(PL_thismad);
4224                 PL_thismad = 0;
4225             }
4226         }
4227         if (!PL_lasttoke) {
4228             PL_lex_state = PL_lex_defer;
4229             PL_expect = PL_lex_expect;
4230             PL_lex_defer = LEX_NORMAL;
4231             if (!PL_nexttoke[PL_lasttoke].next_type)
4232                 return yylex();
4233         }
4234 #else
4235         PL_nexttoke--;
4236         pl_yylval = PL_nextval[PL_nexttoke];
4237         if (!PL_nexttoke) {
4238             PL_lex_state = PL_lex_defer;
4239             PL_expect = PL_lex_expect;
4240             PL_lex_defer = LEX_NORMAL;
4241         }
4242 #endif
4243 #ifdef PERL_MAD
4244         /* FIXME - can these be merged?  */
4245         return(PL_nexttoke[PL_lasttoke].next_type);
4246 #else
4247         return REPORT(PL_nexttype[PL_nexttoke]);
4248 #endif
4249
4250     /* interpolated case modifiers like \L \U, including \Q and \E.
4251        when we get here, PL_bufptr is at the \
4252     */
4253     case LEX_INTERPCASEMOD:
4254 #ifdef DEBUGGING
4255         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4256             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4257 #endif
4258         /* handle \E or end of string */
4259         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4260             /* if at a \E */
4261             if (PL_lex_casemods) {
4262                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4263                 PL_lex_casestack[PL_lex_casemods] = '\0';
4264
4265                 if (PL_bufptr != PL_bufend
4266                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4267                     PL_bufptr += 2;
4268                     PL_lex_state = LEX_INTERPCONCAT;
4269 #ifdef PERL_MAD
4270                     if (PL_madskills)
4271                         PL_thistoken = newSVpvs("\\E");
4272 #endif
4273                 }
4274                 return REPORT(')');
4275             }
4276 #ifdef PERL_MAD
4277             while (PL_bufptr != PL_bufend &&
4278               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4279                 if (!PL_thiswhite)
4280                     PL_thiswhite = newSVpvs("");
4281                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4282                 PL_bufptr += 2;
4283             }
4284 #else
4285             if (PL_bufptr != PL_bufend)
4286                 PL_bufptr += 2;
4287 #endif
4288             PL_lex_state = LEX_INTERPCONCAT;
4289             return yylex();
4290         }
4291         else {
4292             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4293               "### Saw case modifier\n"); });
4294             s = PL_bufptr + 1;
4295             if (s[1] == '\\' && s[2] == 'E') {
4296 #ifdef PERL_MAD
4297                 if (!PL_thiswhite)
4298                     PL_thiswhite = newSVpvs("");
4299                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4300 #endif
4301                 PL_bufptr = s + 3;
4302                 PL_lex_state = LEX_INTERPCONCAT;
4303                 return yylex();
4304             }
4305             else {
4306                 I32 tmp;
4307                 if (!PL_madskills) /* when just compiling don't need correct */
4308                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4309                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4310                 if ((*s == 'L' || *s == 'U') &&
4311                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4312                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4313                     return REPORT(')');
4314                 }
4315                 if (PL_lex_casemods > 10)
4316                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4317                 PL_lex_casestack[PL_lex_casemods++] = *s;
4318                 PL_lex_casestack[PL_lex_casemods] = '\0';
4319                 PL_lex_state = LEX_INTERPCONCAT;
4320                 start_force(PL_curforce);
4321                 NEXTVAL_NEXTTOKE.ival = 0;
4322                 force_next('(');
4323                 start_force(PL_curforce);
4324                 if (*s == 'l')
4325                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4326                 else if (*s == 'u')
4327                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4328                 else if (*s == 'L')
4329                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4330                 else if (*s == 'U')
4331                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4332                 else if (*s == 'Q')
4333                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4334                 else
4335                     Perl_croak(aTHX_ "panic: yylex");
4336                 if (PL_madskills) {
4337                     SV* const tmpsv = newSVpvs("\\ ");
4338                     /* replace the space with the character we want to escape
4339                      */
4340                     SvPVX(tmpsv)[1] = *s;
4341                     curmad('_', tmpsv);
4342                 }
4343                 PL_bufptr = s + 1;
4344             }
4345             force_next(FUNC);
4346             if (PL_lex_starts) {
4347                 s = PL_bufptr;
4348                 PL_lex_starts = 0;
4349 #ifdef PERL_MAD
4350                 if (PL_madskills) {
4351                     if (PL_thistoken)
4352                         sv_free(PL_thistoken);
4353                     PL_thistoken = newSVpvs("");
4354                 }
4355 #endif
4356                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4357                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4358                     OPERATOR(',');
4359                 else
4360                     Aop(OP_CONCAT);
4361             }
4362             else
4363                 return yylex();
4364         }
4365
4366     case LEX_INTERPPUSH:
4367         return REPORT(sublex_push());
4368
4369     case LEX_INTERPSTART:
4370         if (PL_bufptr == PL_bufend)
4371             return REPORT(sublex_done());
4372         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4373               "### Interpolated variable\n"); });
4374         PL_expect = XTERM;
4375         PL_lex_dojoin = (*PL_bufptr == '@');
4376         PL_lex_state = LEX_INTERPNORMAL;
4377         if (PL_lex_dojoin) {
4378             start_force(PL_curforce);
4379             NEXTVAL_NEXTTOKE.ival = 0;
4380             force_next(',');
4381             start_force(PL_curforce);
4382             force_ident("\"", '$');
4383             start_force(PL_curforce);
4384             NEXTVAL_NEXTTOKE.ival = 0;
4385             force_next('$');
4386             start_force(PL_curforce);
4387             NEXTVAL_NEXTTOKE.ival = 0;
4388             force_next('(');
4389             start_force(PL_curforce);
4390             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4391             force_next(FUNC);
4392         }
4393         if (PL_lex_starts++) {
4394             s = PL_bufptr;
4395 #ifdef PERL_MAD
4396             if (PL_madskills) {
4397                 if (PL_thistoken)
4398                     sv_free(PL_thistoken);
4399                 PL_thistoken = newSVpvs("");
4400             }
4401 #endif
4402             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4403             if (!PL_lex_casemods && PL_lex_inpat)
4404                 OPERATOR(',');
4405             else
4406                 Aop(OP_CONCAT);
4407         }
4408         return yylex();
4409
4410     case LEX_INTERPENDMAYBE:
4411         if (intuit_more(PL_bufptr)) {
4412             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4413             break;
4414         }
4415         /* FALL THROUGH */
4416
4417     case LEX_INTERPEND:
4418         if (PL_lex_dojoin) {
4419             PL_lex_dojoin = FALSE;
4420             PL_lex_state = LEX_INTERPCONCAT;
4421 #ifdef PERL_MAD
4422             if (PL_madskills) {
4423                 if (PL_thistoken)
4424                     sv_free(PL_thistoken);
4425                 PL_thistoken = newSVpvs("");
4426             }
4427 #endif
4428             return REPORT(')');
4429         }
4430         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4431             && SvEVALED(PL_lex_repl))
4432         {
4433             if (PL_bufptr != PL_bufend)
4434                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4435             PL_lex_repl = NULL;
4436         }
4437         /* FALLTHROUGH */
4438     case LEX_INTERPCONCAT:
4439 #ifdef DEBUGGING
4440         if (PL_lex_brackets)
4441             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4442 #endif
4443         if (PL_bufptr == PL_bufend)
4444             return REPORT(sublex_done());
4445
4446         if (SvIVX(PL_linestr) == '\'') {
4447             SV *sv = newSVsv(PL_linestr);
4448             if (!PL_lex_inpat)
4449                 sv = tokeq(sv);
4450             else if ( PL_hints & HINT_NEW_RE )
4451                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4452             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4453             s = PL_bufend;
4454         }
4455         else {
4456             s = scan_const(PL_bufptr);
4457             if (*s == '\\')
4458                 PL_lex_state = LEX_INTERPCASEMOD;
4459             else
4460                 PL_lex_state = LEX_INTERPSTART;
4461         }
4462
4463         if (s != PL_bufptr) {
4464             start_force(PL_curforce);
4465             if (PL_madskills) {
4466                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4467             }
4468             NEXTVAL_NEXTTOKE = pl_yylval;
4469             PL_expect = XTERM;
4470             force_next(THING);
4471             if (PL_lex_starts++) {
4472 #ifdef PERL_MAD
4473                 if (PL_madskills) {
4474                     if (PL_thistoken)
4475                         sv_free(PL_thistoken);
4476                     PL_thistoken = newSVpvs("");
4477                 }
4478 #endif
4479                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4480                 if (!PL_lex_casemods && PL_lex_inpat)
4481                     OPERATOR(',');
4482                 else
4483                     Aop(OP_CONCAT);
4484             }
4485             else {
4486                 PL_bufptr = s;
4487                 return yylex();
4488             }
4489         }
4490
4491         return yylex();
4492     case LEX_FORMLINE:
4493         PL_lex_state = LEX_NORMAL;
4494         s = scan_formline(PL_bufptr);
4495         if (!PL_lex_formbrack)
4496             goto rightbracket;
4497         OPERATOR(';');
4498     }
4499
4500     s = PL_bufptr;
4501     PL_oldoldbufptr = PL_oldbufptr;
4502     PL_oldbufptr = s;
4503
4504   retry:
4505 #ifdef PERL_MAD
4506     if (PL_thistoken) {
4507         sv_free(PL_thistoken);
4508         PL_thistoken = 0;
4509     }
4510     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4511 #endif
4512     switch (*s) {
4513     default:
4514         if (isIDFIRST_lazy_if(s,UTF))
4515             goto keylookup;
4516         {
4517         unsigned char c = *s;
4518         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4519         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4520             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4521         } else {
4522             d = PL_linestart;
4523         }
4524         *s = '\0';
4525         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4526     }
4527     case 4:
4528     case 26:
4529         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4530     case 0:
4531 #ifdef PERL_MAD
4532         if (PL_madskills)
4533             PL_faketokens = 0;
4534 #endif
4535         if (!PL_rsfp) {
4536             PL_last_uni = 0;
4537             PL_last_lop = 0;
4538             if (PL_lex_brackets) {
4539                 yyerror((const char *)
4540                         (PL_lex_formbrack
4541                          ? "Format not terminated"
4542                          : "Missing right curly or square bracket"));
4543             }
4544             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4545                         "### Tokener got EOF\n");
4546             } );
4547             TOKEN(0);
4548         }
4549         if (s++ < PL_bufend)
4550             goto retry;                 /* ignore stray nulls */
4551         PL_last_uni = 0;
4552         PL_last_lop = 0;
4553         if (!PL_in_eval && !PL_preambled) {
4554             PL_preambled = TRUE;
4555 #ifdef PERL_MAD
4556             if (PL_madskills)
4557                 PL_faketokens = 1;
4558 #endif
4559             if (PL_perldb) {
4560                 /* Generate a string of Perl code to load the debugger.
4561                  * If PERL5DB is set, it will return the contents of that,
4562                  * otherwise a compile-time require of perl5db.pl.  */
4563
4564                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4565
4566                 if (pdb) {
4567                     sv_setpv(PL_linestr, pdb);
4568                     sv_catpvs(PL_linestr,";");
4569                 } else {
4570                     SETERRNO(0,SS_NORMAL);
4571                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4572                 }
4573             } else
4574                 sv_setpvs(PL_linestr,"");
4575             if (PL_preambleav) {
4576                 SV **svp = AvARRAY(PL_preambleav);
4577                 SV **const end = svp + AvFILLp(PL_preambleav);
4578                 while(svp <= end) {
4579                     sv_catsv(PL_linestr, *svp);
4580                     ++svp;
4581                     sv_catpvs(PL_linestr, ";");
4582                 }
4583                 sv_free(MUTABLE_SV(PL_preambleav));
4584                 PL_preambleav = NULL;
4585             }
4586             if (PL_minus_E)
4587                 sv_catpvs(PL_linestr,
4588                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4589             if (PL_minus_n || PL_minus_p) {
4590                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4591                 if (PL_minus_l)
4592                     sv_catpvs(PL_linestr,"chomp;");
4593                 if (PL_minus_a) {
4594                     if (PL_minus_F) {
4595                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4596                              || *PL_splitstr == '"')
4597                               && strchr(PL_splitstr + 1, *PL_splitstr))
4598                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4599                         else {
4600                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4601                                bytes can be used as quoting characters.  :-) */
4602                             const char *splits = PL_splitstr;
4603                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4604                             do {
4605                                 /* Need to \ \s  */
4606                                 if (*splits == '\\')
4607                                     sv_catpvn(PL_linestr, splits, 1);
4608                                 sv_catpvn(PL_linestr, splits, 1);
4609                             } while (*splits++);
4610                             /* This loop will embed the trailing NUL of
4611                                PL_linestr as the last thing it does before
4612                                terminating.  */
4613                             sv_catpvs(PL_linestr, ");");
4614                         }
4615                     }
4616                     else
4617                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4618                 }
4619             }
4620             sv_catpvs(PL_linestr, "\n");
4621             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4622             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4623             PL_last_lop = PL_last_uni = NULL;
4624             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4625                 update_debugger_info(PL_linestr, NULL, 0);
4626             goto retry;
4627         }
4628         do {
4629             fake_eof = 0;
4630             bof = PL_rsfp ? TRUE : FALSE;
4631             if (0) {
4632               fake_eof:
4633                 fake_eof = LEX_FAKE_EOF;
4634             }
4635             PL_bufptr = PL_bufend;
4636             CopLINE_inc(PL_curcop);
4637             if (!lex_next_chunk(fake_eof)) {
4638                 CopLINE_dec(PL_curcop);
4639                 s = PL_bufptr;
4640                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4641             }
4642             CopLINE_dec(PL_curcop);
4643 #ifdef PERL_MAD
4644             if (!PL_rsfp)
4645                 PL_realtokenstart = -1;
4646 #endif
4647             s = PL_bufptr;
4648             /* If it looks like the start of a BOM or raw UTF-16,
4649              * check if it in fact is. */
4650             if (bof && PL_rsfp &&
4651                      (*s == 0 ||
4652                       *(U8*)s == 0xEF ||
4653                       *(U8*)s >= 0xFE ||
4654                       s[1] == 0)) {
4655                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4656                 if (bof) {
4657                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4658                     s = swallow_bom((U8*)s);
4659                 }
4660             }
4661             if (PL_doextract) {
4662                 /* Incest with pod. */
4663 #ifdef PERL_MAD
4664                 if (PL_madskills)
4665                     sv_catsv(PL_thiswhite, PL_linestr);
4666 #endif
4667                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4668                     sv_setpvs(PL_linestr, "");
4669                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4670                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4671                     PL_last_lop = PL_last_uni = NULL;
4672                     PL_doextract = FALSE;
4673                 }
4674             }
4675             if (PL_rsfp)
4676                 incline(s);
4677         } while (PL_doextract);
4678         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4679         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4680         PL_last_lop = PL_last_uni = NULL;
4681         if (CopLINE(PL_curcop) == 1) {
4682             while (s < PL_bufend && isSPACE(*s))
4683                 s++;
4684             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4685                 s++;
4686 #ifdef PERL_MAD
4687             if (PL_madskills)
4688                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4689 #endif
4690             d = NULL;
4691             if (!PL_in_eval) {
4692                 if (*s == '#' && *(s+1) == '!')
4693                     d = s + 2;
4694 #ifdef ALTERNATE_SHEBANG
4695                 else {
4696                     static char const as[] = ALTERNATE_SHEBANG;
4697                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4698                         d = s + (sizeof(as) - 1);
4699                 }
4700 #endif /* ALTERNATE_SHEBANG */
4701             }
4702             if (d) {
4703                 char *ipath;
4704                 char *ipathend;
4705
4706                 while (isSPACE(*d))
4707                     d++;
4708                 ipath = d;
4709                 while (*d && !isSPACE(*d))
4710                     d++;
4711                 ipathend = d;
4712
4713 #ifdef ARG_ZERO_IS_SCRIPT
4714                 if (ipathend > ipath) {
4715                     /*
4716                      * HP-UX (at least) sets argv[0] to the script name,
4717                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4718                      * at least, set argv[0] to the basename of the Perl
4719                      * interpreter. So, having found "#!", we'll set it right.
4720                      */
4721                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4722                                                     SVt_PV)); /* $^X */
4723                     assert(SvPOK(x) || SvGMAGICAL(x));
4724                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4725                         sv_setpvn(x, ipath, ipathend - ipath);
4726                         SvSETMAGIC(x);
4727                     }
4728                     else {
4729                         STRLEN blen;
4730                         STRLEN llen;
4731                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4732                         const char * const lstart = SvPV_const(x,llen);
4733                         if (llen < blen) {
4734                             bstart += blen - llen;
4735                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4736                                 sv_setpvn(x, ipath, ipathend - ipath);
4737                                 SvSETMAGIC(x);
4738                             }
4739                         }
4740                     }
4741                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4742                 }
4743 #endif /* ARG_ZERO_IS_SCRIPT */
4744
4745                 /*
4746                  * Look for options.
4747                  */
4748                 d = instr(s,"perl -");
4749                 if (!d) {
4750                     d = instr(s,"perl");
4751 #if defined(DOSISH)
4752                     /* avoid getting into infinite loops when shebang
4753                      * line contains "Perl" rather than "perl" */
4754                     if (!d) {
4755                         for (d = ipathend-4; d >= ipath; --d) {
4756                             if ((*d == 'p' || *d == 'P')
4757                                 && !ibcmp(d, "perl", 4))
4758                             {
4759                                 break;
4760                             }
4761                         }
4762                         if (d < ipath)
4763                             d = NULL;
4764                     }
4765 #endif
4766                 }
4767 #ifdef ALTERNATE_SHEBANG
4768                 /*
4769                  * If the ALTERNATE_SHEBANG on this system starts with a
4770                  * character that can be part of a Perl expression, then if
4771                  * we see it but not "perl", we're probably looking at the
4772                  * start of Perl code, not a request to hand off to some
4773                  * other interpreter.  Similarly, if "perl" is there, but
4774                  * not in the first 'word' of the line, we assume the line
4775                  * contains the start of the Perl program.
4776                  */
4777                 if (d && *s != '#') {
4778                     const char *c = ipath;
4779                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4780                         c++;
4781                     if (c < d)
4782                         d = NULL;       /* "perl" not in first word; ignore */
4783                     else
4784                         *s = '#';       /* Don't try to parse shebang line */
4785                 }
4786 #endif /* ALTERNATE_SHEBANG */
4787                 if (!d &&
4788                     *s == '#' &&
4789                     ipathend > ipath &&
4790                     !PL_minus_c &&
4791                     !instr(s,"indir") &&
4792                     instr(PL_origargv[0],"perl"))
4793                 {
4794                     dVAR;
4795                     char **newargv;
4796
4797                     *ipathend = '\0';
4798                     s = ipathend + 1;
4799                     while (s < PL_bufend && isSPACE(*s))
4800                         s++;
4801                     if (s < PL_bufend) {
4802                         Newx(newargv,PL_origargc+3,char*);
4803                         newargv[1] = s;
4804                         while (s < PL_bufend && !isSPACE(*s))
4805                             s++;
4806                         *s = '\0';
4807                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4808                     }
4809                     else
4810                         newargv = PL_origargv;
4811                     newargv[0] = ipath;
4812                     PERL_FPU_PRE_EXEC
4813                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4814                     PERL_FPU_POST_EXEC
4815                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4816                 }
4817                 if (d) {
4818                     while (*d && !isSPACE(*d))
4819                         d++;
4820                     while (SPACE_OR_TAB(*d))
4821                         d++;
4822
4823                     if (*d++ == '-') {
4824                         const bool switches_done = PL_doswitches;
4825                         const U32 oldpdb = PL_perldb;
4826                         const bool oldn = PL_minus_n;
4827                         const bool oldp = PL_minus_p;
4828                         const char *d1 = d;
4829
4830                         do {
4831                             bool baduni = FALSE;
4832                             if (*d1 == 'C') {
4833                                 const char *d2 = d1 + 1;
4834                                 if (parse_unicode_opts((const char **)&d2)
4835                                     != PL_unicode)
4836                                     baduni = TRUE;
4837                             }
4838                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4839                                 const char * const m = d1;
4840                                 while (*d1 && !isSPACE(*d1))
4841                                     d1++;
4842                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4843                                       (int)(d1 - m), m);
4844                             }
4845                             d1 = moreswitches(d1);
4846                         } while (d1);
4847                         if (PL_doswitches && !switches_done) {
4848                             int argc = PL_origargc;
4849                             char **argv = PL_origargv;
4850                             do {
4851                                 argc--,argv++;
4852                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4853                             init_argv_symbols(argc,argv);
4854                         }
4855                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4856                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4857                               /* if we have already added "LINE: while (<>) {",
4858                                  we must not do it again */
4859                         {
4860                             sv_setpvs(PL_linestr, "");
4861                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4862                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4863                             PL_last_lop = PL_last_uni = NULL;
4864                             PL_preambled = FALSE;
4865                             if (PERLDB_LINE || PERLDB_SAVESRC)
4866                                 (void)gv_fetchfile(PL_origfilename);
4867                             goto retry;
4868                         }
4869                     }
4870                 }
4871             }
4872         }
4873         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4874             PL_bufptr = s;
4875             PL_lex_state = LEX_FORMLINE;
4876             return yylex();
4877         }
4878         goto retry;
4879     case '\r':
4880 #ifdef PERL_STRICT_CR
4881         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4882         Perl_croak(aTHX_
4883       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4884 #endif
4885     case ' ': case '\t': case '\f': case 013:
4886 #ifdef PERL_MAD
4887         PL_realtokenstart = -1;
4888         if (!PL_thiswhite)
4889             PL_thiswhite = newSVpvs("");
4890         sv_catpvn(PL_thiswhite, s, 1);
4891 #endif
4892         s++;
4893         goto retry;
4894     case '#':
4895     case '\n':
4896 #ifdef PERL_MAD
4897         PL_realtokenstart = -1;
4898         if (PL_madskills)
4899             PL_faketokens = 0;
4900 #endif
4901         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4902             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4903                 /* handle eval qq[#line 1 "foo"\n ...] */
4904                 CopLINE_dec(PL_curcop);
4905                 incline(s);
4906             }
4907             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4908                 s = SKIPSPACE0(s);
4909                 if (!PL_in_eval || PL_rsfp)
4910                     incline(s);
4911             }
4912             else {
4913                 d = s;
4914                 while (d < PL_bufend && *d != '\n')
4915                     d++;
4916                 if (d < PL_bufend)
4917                     d++;
4918                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4919                   Perl_croak(aTHX_ "panic: input overflow");
4920 #ifdef PERL_MAD
4921                 if (PL_madskills)
4922                     PL_thiswhite = newSVpvn(s, d - s);
4923 #endif
4924                 s = d;
4925                 incline(s);
4926             }
4927             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4928                 PL_bufptr = s;
4929                 PL_lex_state = LEX_FORMLINE;
4930                 return yylex();
4931             }
4932         }
4933         else {
4934 #ifdef PERL_MAD
4935             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4936                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4937                     PL_faketokens = 0;
4938                     s = SKIPSPACE0(s);
4939                     TOKEN(PEG); /* make sure any #! line is accessible */
4940                 }
4941                 s = SKIPSPACE0(s);
4942             }
4943             else {
4944 /*              if (PL_madskills && PL_lex_formbrack) { */
4945                     d = s;
4946                     while (d < PL_bufend && *d != '\n')
4947                         d++;
4948                     if (d < PL_bufend)
4949                         d++;
4950                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4951                       Perl_croak(aTHX_ "panic: input overflow");
4952                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4953                         if (!PL_thiswhite)
4954                             PL_thiswhite = newSVpvs("");
4955                         if (CopLINE(PL_curcop) == 1) {
4956                             sv_setpvs(PL_thiswhite, "");
4957                             PL_faketokens = 0;
4958                         }
4959                         sv_catpvn(PL_thiswhite, s, d - s);
4960                     }
4961                     s = d;
4962 /*              }
4963                 *s = '\0';
4964                 PL_bufend = s; */
4965             }
4966 #else
4967             *s = '\0';
4968             PL_bufend = s;
4969 #endif
4970         }
4971         goto retry;
4972     case '-':
4973         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4974             I32 ftst = 0;
4975             char tmp;
4976
4977             s++;
4978             PL_bufptr = s;
4979             tmp = *s++;
4980
4981             while (s < PL_bufend && SPACE_OR_TAB(*s))
4982                 s++;
4983
4984             if (strnEQ(s,"=>",2)) {
4985                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4986                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4987                 OPERATOR('-');          /* unary minus */
4988             }
4989             PL_last_uni = PL_oldbufptr;
4990             switch (tmp) {
4991             case 'r': ftst = OP_FTEREAD;        break;
4992             case 'w': ftst = OP_FTEWRITE;       break;
4993             case 'x': ftst = OP_FTEEXEC;        break;
4994             case 'o': ftst = OP_FTEOWNED;       break;
4995             case 'R': ftst = OP_FTRREAD;        break;
4996             case 'W': ftst = OP_FTRWRITE;       break;
4997             case 'X': ftst = OP_FTREXEC;        break;
4998             case 'O': ftst = OP_FTROWNED;       break;
4999             case 'e': ftst = OP_FTIS;           break;
5000             case 'z': ftst = OP_FTZERO;         break;
5001             case 's': ftst = OP_FTSIZE;         break;
5002             case 'f': ftst = OP_FTFILE;         break;
5003             case 'd': ftst = OP_FTDIR;          break;
5004             case 'l': ftst = OP_FTLINK;         break;
5005             case 'p': ftst = OP_FTPIPE;         break;
5006             case 'S': ftst = OP_FTSOCK;         break;
5007             case 'u': ftst = OP_FTSUID;         break;
5008             case 'g': ftst = OP_FTSGID;         break;
5009             case 'k': ftst = OP_FTSVTX;         break;
5010             case 'b': ftst = OP_FTBLK;          break;
5011             case 'c': ftst = OP_FTCHR;          break;
5012             case 't': ftst = OP_FTTTY;          break;
5013             case 'T': ftst = OP_FTTEXT;         break;
5014             case 'B': ftst = OP_FTBINARY;       break;
5015             case 'M': case 'A': case 'C':
5016                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5017                 switch (tmp) {
5018                 case 'M': ftst = OP_FTMTIME;    break;
5019                 case 'A': ftst = OP_FTATIME;    break;
5020                 case 'C': ftst = OP_FTCTIME;    break;
5021                 default:                        break;
5022                 }
5023                 break;
5024             default:
5025                 break;
5026             }
5027             if (ftst) {
5028                 PL_last_lop_op = (OPCODE)ftst;
5029                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5030                         "### Saw file test %c\n", (int)tmp);
5031                 } );
5032                 FTST(ftst);
5033             }
5034             else {
5035                 /* Assume it was a minus followed by a one-letter named
5036                  * subroutine call (or a -bareword), then. */
5037                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5038                         "### '-%c' looked like a file test but was not\n",
5039                         (int) tmp);
5040                 } );
5041                 s = --PL_bufptr;
5042             }
5043         }
5044         {
5045             const char tmp = *s++;
5046             if (*s == tmp) {
5047                 s++;
5048                 if (PL_expect == XOPERATOR)
5049                     TERM(POSTDEC);
5050                 else
5051                     OPERATOR(PREDEC);
5052             }
5053             else if (*s == '>') {
5054                 s++;
5055                 s = SKIPSPACE1(s);
5056                 if (isIDFIRST_lazy_if(s,UTF)) {
5057                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5058                     TOKEN(ARROW);
5059                 }
5060                 else if (*s == '$')
5061                     OPERATOR(ARROW);
5062                 else
5063                     TERM(ARROW);
5064             }
5065             if (PL_expect == XOPERATOR)
5066                 Aop(OP_SUBTRACT);
5067             else {
5068                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5069                     check_uni();
5070                 OPERATOR('-');          /* unary minus */
5071             }
5072         }
5073
5074     case '+':
5075         {
5076             const char tmp = *s++;
5077             if (*s == tmp) {
5078                 s++;
5079                 if (PL_expect == XOPERATOR)
5080                     TERM(POSTINC);
5081                 else
5082                     OPERATOR(PREINC);
5083             }
5084             if (PL_expect == XOPERATOR)
5085                 Aop(OP_ADD);
5086             else {
5087                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5088                     check_uni();
5089                 OPERATOR('+');
5090             }
5091         }
5092
5093     case '*':
5094         if (PL_expect != XOPERATOR) {
5095             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5096             PL_expect = XOPERATOR;
5097             force_ident(PL_tokenbuf, '*');
5098             if (!*PL_tokenbuf)
5099                 PREREF('*');
5100             TERM('*');
5101         }
5102         s++;
5103         if (*s == '*') {
5104             s++;
5105             PWop(OP_POW);
5106         }
5107         Mop(OP_MULTIPLY);
5108
5109     case '%':
5110         if (PL_expect == XOPERATOR) {
5111             ++s;
5112             Mop(OP_MODULO);
5113         }
5114         PL_tokenbuf[0] = '%';
5115         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5116                 sizeof PL_tokenbuf - 1, FALSE);
5117         if (!PL_tokenbuf[1]) {
5118             PREREF('%');
5119         }
5120         PL_pending_ident = '%';
5121         TERM('%');
5122
5123     case '^':
5124         s++;
5125         BOop(OP_BIT_XOR);
5126     case '[':
5127         PL_lex_brackets++;
5128         {
5129             const char tmp = *s++;
5130             OPERATOR(tmp);
5131         }
5132     case '~':
5133         if (s[1] == '~'
5134             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5135         {
5136             s += 2;
5137             Eop(OP_SMARTMATCH);
5138         }
5139     case ',':
5140         {
5141             const char tmp = *s++;
5142             OPERATOR(tmp);
5143         }
5144     case ':':
5145         if (s[1] == ':') {
5146             len = 0;
5147             goto just_a_word_zero_gv;
5148         }
5149         s++;
5150         switch (PL_expect) {
5151             OP *attrs;
5152 #ifdef PERL_MAD
5153             I32 stuffstart;
5154 #endif
5155         case XOPERATOR:
5156             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5157                 break;
5158             PL_bufptr = s;      /* update in case we back off */
5159             if (*s == '=') {
5160                 deprecate(":= for an empty attribute list");
5161             }
5162             goto grabattrs;
5163         case XATTRBLOCK:
5164             PL_expect = XBLOCK;
5165             goto grabattrs;
5166         case XATTRTERM:
5167             PL_expect = XTERMBLOCK;
5168          grabattrs:
5169 #ifdef PERL_MAD
5170             stuffstart = s - SvPVX(PL_linestr) - 1;
5171 #endif
5172             s = PEEKSPACE(s);
5173             attrs = NULL;
5174             while (isIDFIRST_lazy_if(s,UTF)) {
5175                 I32 tmp;
5176                 SV *sv;
5177                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5178                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5179                     if (tmp < 0) tmp = -tmp;
5180                     switch (tmp) {
5181                     case KEY_or:
5182                     case KEY_and:
5183                     case KEY_for:
5184                     case KEY_foreach:
5185                     case KEY_unless:
5186                     case KEY_if:
5187                     case KEY_while:
5188                     case KEY_until:
5189                         goto got_attrs;
5190                     default:
5191                         break;
5192                     }
5193                 }
5194                 sv = newSVpvn(s, len);
5195                 if (*d == '(') {
5196                     d = scan_str(d,TRUE,TRUE);
5197                     if (!d) {
5198                         /* MUST advance bufptr here to avoid bogus
5199                            "at end of line" context messages from yyerror().
5200                          */
5201                         PL_bufptr = s + len;
5202                         yyerror("Unterminated attribute parameter in attribute list");
5203                         if (attrs)
5204                             op_free(attrs);
5205                         sv_free(sv);
5206                         return REPORT(0);       /* EOF indicator */
5207                     }
5208                 }
5209                 if (PL_lex_stuff) {
5210                     sv_catsv(sv, PL_lex_stuff);
5211                     attrs = append_elem(OP_LIST, attrs,
5212                                         newSVOP(OP_CONST, 0, sv));
5213                     SvREFCNT_dec(PL_lex_stuff);
5214                     PL_lex_stuff = NULL;
5215                 }
5216                 else {
5217                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5218                         sv_free(sv);
5219                         if (PL_in_my == KEY_our) {
5220                             deprecate(":unique");
5221                         }
5222                         else
5223                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5224                     }
5225
5226                     /* NOTE: any CV attrs applied here need to be part of
5227                        the CVf_BUILTIN_ATTRS define in cv.h! */
5228                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5229                         sv_free(sv);
5230                         CvLVALUE_on(PL_compcv);
5231                     }
5232                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5233                         sv_free(sv);
5234                         deprecate(":locked");
5235                     }
5236                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5237                         sv_free(sv);
5238                         CvMETHOD_on(PL_compcv);
5239                     }
5240                     /* After we've set the flags, it could be argued that
5241                        we don't need to do the attributes.pm-based setting
5242                        process, and shouldn't bother appending recognized
5243                        flags.  To experiment with that, uncomment the
5244                        following "else".  (Note that's already been
5245                        uncommented.  That keeps the above-applied built-in
5246                        attributes from being intercepted (and possibly
5247                        rejected) by a package's attribute routines, but is
5248                        justified by the performance win for the common case
5249                        of applying only built-in attributes.) */
5250                     else
5251                         attrs = append_elem(OP_LIST, attrs,
5252                                             newSVOP(OP_CONST, 0,
5253                                                     sv));
5254                 }
5255                 s = PEEKSPACE(d);
5256                 if (*s == ':' && s[1] != ':')
5257                     s = PEEKSPACE(s+1);
5258                 else if (s == d)
5259                     break;      /* require real whitespace or :'s */
5260                 /* XXX losing whitespace on sequential attributes here */
5261             }
5262             {
5263                 const char tmp
5264                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5265                 if (*s != ';' && *s != '}' && *s != tmp
5266                     && (tmp != '=' || *s != ')')) {
5267                     const char q = ((*s == '\'') ? '"' : '\'');
5268                     /* If here for an expression, and parsed no attrs, back
5269                        off. */
5270                     if (tmp == '=' && !attrs) {
5271                         s = PL_bufptr;
5272                         break;
5273                     }
5274                     /* MUST advance bufptr here to avoid bogus "at end of line"
5275                        context messages from yyerror().
5276                     */
5277                     PL_bufptr = s;
5278                     yyerror( (const char *)
5279                              (*s
5280                               ? Perl_form(aTHX_ "Invalid separator character "
5281                                           "%c%c%c in attribute list", q, *s, q)
5282                               : "Unterminated attribute list" ) );
5283                     if (attrs)
5284                         op_free(attrs);
5285                     OPERATOR(':');
5286                 }
5287             }
5288         got_attrs:
5289             if (attrs) {
5290                 start_force(PL_curforce);
5291                 NEXTVAL_NEXTTOKE.opval = attrs;
5292                 CURMAD('_', PL_nextwhite);
5293                 force_next(THING);
5294             }
5295 #ifdef PERL_MAD
5296             if (PL_madskills) {
5297                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5298                                      (s - SvPVX(PL_linestr)) - stuffstart);
5299             }
5300 #endif
5301             TOKEN(COLONATTR);
5302         }
5303         OPERATOR(':');
5304     case '(':
5305         s++;
5306         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5307             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5308         else
5309             PL_expect = XTERM;
5310         s = SKIPSPACE1(s);
5311         TOKEN('(');
5312     case ';':
5313         CLINE;
5314         {
5315             const char tmp = *s++;
5316             OPERATOR(tmp);
5317         }
5318     case ')':
5319         {
5320             const char tmp = *s++;
5321             s = SKIPSPACE1(s);
5322             if (*s == '{')
5323                 PREBLOCK(tmp);
5324             TERM(tmp);
5325         }
5326     case ']':
5327         s++;
5328         if (PL_lex_brackets <= 0)
5329             yyerror("Unmatched right square bracket");
5330         else
5331             --PL_lex_brackets;
5332         if (PL_lex_state == LEX_INTERPNORMAL) {
5333             if (PL_lex_brackets == 0) {
5334                 if (*s == '-' && s[1] == '>')
5335                     PL_lex_state = LEX_INTERPENDMAYBE;
5336                 else if (*s != '[' && *s != '{')
5337                     PL_lex_state = LEX_INTERPEND;
5338             }
5339         }
5340         TERM(']');
5341     case '{':
5342       leftbracket:
5343         s++;
5344         if (PL_lex_brackets > 100) {
5345             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5346         }
5347         switch (PL_expect) {
5348         case XTERM:
5349             if (PL_lex_formbrack) {
5350                 s--;
5351                 PRETERMBLOCK(DO);
5352             }
5353             if (PL_oldoldbufptr == PL_last_lop)
5354                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5355             else
5356                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5357             OPERATOR(HASHBRACK);
5358         case XOPERATOR:
5359             while (s < PL_bufend && SPACE_OR_TAB(*s))
5360                 s++;
5361             d = s;
5362             PL_tokenbuf[0] = '\0';
5363             if (d < PL_bufend && *d == '-') {
5364                 PL_tokenbuf[0] = '-';
5365                 d++;
5366                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5367                     d++;
5368             }
5369             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5370                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5371                               FALSE, &len);
5372                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5373                     d++;
5374                 if (*d == '}') {
5375                     const char minus = (PL_tokenbuf[0] == '-');
5376                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5377                     if (minus)
5378                         force_next('-');
5379                 }
5380             }
5381             /* FALL THROUGH */
5382         case XATTRBLOCK:
5383         case XBLOCK:
5384             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5385             PL_expect = XSTATE;
5386             break;
5387         case XATTRTERM:
5388         case XTERMBLOCK:
5389             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5390             PL_expect = XSTATE;
5391             break;
5392         default: {
5393                 const char *t;
5394                 if (PL_oldoldbufptr == PL_last_lop)
5395                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5396                 else
5397                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5398                 s = SKIPSPACE1(s);
5399                 if (*s == '}') {
5400                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5401                         PL_expect = XTERM;
5402                         /* This hack is to get the ${} in the message. */
5403                         PL_bufptr = s+1;
5404                         yyerror("syntax error");
5405                         break;
5406                     }
5407                     OPERATOR(HASHBRACK);
5408                 }
5409                 /* This hack serves to disambiguate a pair of curlies
5410                  * as being a block or an anon hash.  Normally, expectation
5411                  * determines that, but in cases where we're not in a
5412                  * position to expect anything in particular (like inside
5413                  * eval"") we have to resolve the ambiguity.  This code
5414                  * covers the case where the first term in the curlies is a
5415                  * quoted string.  Most other cases need to be explicitly
5416                  * disambiguated by prepending a "+" before the opening
5417                  * curly in order to force resolution as an anon hash.
5418                  *
5419                  * XXX should probably propagate the outer expectation
5420                  * into eval"" to rely less on this hack, but that could
5421                  * potentially break current behavior of eval"".
5422                  * GSAR 97-07-21
5423                  */
5424                 t = s;
5425                 if (*s == '\'' || *s == '"' || *s == '`') {
5426                     /* common case: get past first string, handling escapes */
5427                     for (t++; t < PL_bufend && *t != *s;)
5428                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5429                             t++;
5430                     t++;
5431                 }
5432                 else if (*s == 'q') {
5433                     if (++t < PL_bufend
5434                         && (!isALNUM(*t)
5435                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5436                                 && !isALNUM(*t))))
5437                     {
5438                         /* skip q//-like construct */
5439                         const char *tmps;
5440                         char open, close, term;
5441                         I32 brackets = 1;
5442
5443                         while (t < PL_bufend && isSPACE(*t))
5444                             t++;
5445                         /* check for q => */
5446                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5447                             OPERATOR(HASHBRACK);
5448                         }
5449                         term = *t;
5450                         open = term;
5451                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5452                             term = tmps[5];
5453                         close = term;
5454                         if (open == close)
5455                             for (t++; t < PL_bufend; t++) {
5456                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5457                                     t++;
5458                                 else if (*t == open)
5459                                     break;
5460                             }
5461                         else {
5462                             for (t++; t < PL_bufend; t++) {
5463                                 if (*t == '\\' && t+1 < PL_bufend)
5464                                     t++;
5465                                 else if (*t == close && --brackets <= 0)
5466                                     break;
5467                                 else if (*t == open)
5468                                     brackets++;
5469                             }
5470                         }
5471                         t++;
5472                     }
5473                     else
5474                         /* skip plain q word */
5475                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5476                              t += UTF8SKIP(t);
5477                 }
5478                 else if (isALNUM_lazy_if(t,UTF)) {
5479                     t += UTF8SKIP(t);
5480                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5481                          t += UTF8SKIP(t);
5482                 }
5483                 while (t < PL_bufend && isSPACE(*t))
5484                     t++;
5485                 /* if comma follows first term, call it an anon hash */
5486                 /* XXX it could be a comma expression with loop modifiers */
5487                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5488                                    || (*t == '=' && t[1] == '>')))
5489                     OPERATOR(HASHBRACK);
5490                 if (PL_expect == XREF)
5491                     PL_expect = XTERM;
5492                 else {
5493                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5494                     PL_expect = XSTATE;
5495                 }
5496             }
5497             break;
5498         }
5499         pl_yylval.ival = CopLINE(PL_curcop);
5500         if (isSPACE(*s) || *s == '#')
5501             PL_copline = NOLINE;   /* invalidate current command line number */
5502         TOKEN('{');
5503     case '}':
5504       rightbracket:
5505         s++;
5506         if (PL_lex_brackets <= 0)
5507             yyerror("Unmatched right curly bracket");
5508         else
5509             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5510         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5511             PL_lex_formbrack = 0;
5512         if (PL_lex_state == LEX_INTERPNORMAL) {
5513             if (PL_lex_brackets == 0) {
5514                 if (PL_expect & XFAKEBRACK) {
5515                     PL_expect &= XENUMMASK;
5516                     PL_lex_state = LEX_INTERPEND;
5517                     PL_bufptr = s;
5518 #if 0
5519                     if (PL_madskills) {
5520                         if (!PL_thiswhite)
5521                             PL_thiswhite = newSVpvs("");
5522                         sv_catpvs(PL_thiswhite,"}");
5523                     }
5524 #endif
5525                     return yylex();     /* ignore fake brackets */
5526                 }
5527                 if (*s == '-' && s[1] == '>')
5528                     PL_lex_state = LEX_INTERPENDMAYBE;
5529                 else if (*s != '[' && *s != '{')
5530                     PL_lex_state = LEX_INTERPEND;
5531             }
5532         }
5533         if (PL_expect & XFAKEBRACK) {
5534             PL_expect &= XENUMMASK;
5535             PL_bufptr = s;
5536             return yylex();             /* ignore fake brackets */
5537         }
5538         start_force(PL_curforce);
5539         if (PL_madskills) {
5540             curmad('X', newSVpvn(s-1,1));
5541             CURMAD('_', PL_thiswhite);
5542         }
5543         force_next('}');
5544 #ifdef PERL_MAD
5545         if (!PL_thistoken)
5546             PL_thistoken = newSVpvs("");
5547 #endif
5548         TOKEN(';');
5549     case '&':
5550         s++;
5551         if (*s++ == '&')
5552             AOPERATOR(ANDAND);
5553         s--;
5554         if (PL_expect == XOPERATOR) {
5555             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5556                 && isIDFIRST_lazy_if(s,UTF))
5557             {
5558                 CopLINE_dec(PL_curcop);
5559                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5560                 CopLINE_inc(PL_curcop);
5561             }
5562             BAop(OP_BIT_AND);
5563         }
5564
5565         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5566         if (*PL_tokenbuf) {
5567             PL_expect = XOPERATOR;
5568             force_ident(PL_tokenbuf, '&');
5569         }
5570         else
5571             PREREF('&');
5572         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5573         TERM('&');
5574
5575     case '|':
5576         s++;
5577         if (*s++ == '|')
5578             AOPERATOR(OROR);
5579         s--;
5580         BOop(OP_BIT_OR);
5581     case '=':
5582         s++;
5583         {
5584             const char tmp = *s++;
5585             if (tmp == '=')
5586                 Eop(OP_EQ);
5587             if (tmp == '>')
5588                 OPERATOR(',');
5589             if (tmp == '~')
5590                 PMop(OP_MATCH);
5591             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5592                 && strchr("+-*/%.^&|<",tmp))
5593                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5594                             "Reversed %c= operator",(int)tmp);
5595             s--;
5596             if (PL_expect == XSTATE && isALPHA(tmp) &&
5597                 (s == PL_linestart+1 || s[-2] == '\n') )
5598                 {
5599                     if (PL_in_eval && !PL_rsfp) {
5600                         d = PL_bufend;
5601                         while (s < d) {
5602                             if (*s++ == '\n') {
5603                                 incline(s);
5604                                 if (strnEQ(s,"=cut",4)) {
5605                                     s = strchr(s,'\n');
5606                                     if (s)
5607                                         s++;
5608                                     else
5609                                         s = d;
5610                                     incline(s);
5611                                     goto retry;
5612                                 }
5613                             }
5614                         }
5615                         goto retry;
5616                     }
5617 #ifdef PERL_MAD
5618                     if (PL_madskills) {
5619                         if (!PL_thiswhite)
5620                             PL_thiswhite = newSVpvs("");
5621                         sv_catpvn(PL_thiswhite, PL_linestart,
5622                                   PL_bufend - PL_linestart);
5623                     }
5624 #endif
5625                     s = PL_bufend;
5626                     PL_doextract = TRUE;
5627                     goto retry;
5628                 }
5629         }
5630         if (PL_lex_brackets < PL_lex_formbrack) {
5631             const char *t = s;
5632 #ifdef PERL_STRICT_CR
5633             while (SPACE_OR_TAB(*t))
5634 #else
5635             while (SPACE_OR_TAB(*t) || *t == '\r')
5636 #endif
5637                 t++;
5638             if (*t == '\n' || *t == '#') {
5639                 s--;
5640                 PL_expect = XBLOCK;
5641                 goto leftbracket;
5642             }
5643         }
5644         pl_yylval.ival = 0;
5645         OPERATOR(ASSIGNOP);
5646     case '!':
5647         s++;
5648         {
5649             const char tmp = *s++;
5650             if (tmp == '=') {
5651                 /* was this !=~ where !~ was meant?
5652                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5653
5654                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5655                     const char *t = s+1;
5656
5657                     while (t < PL_bufend && isSPACE(*t))
5658                         ++t;
5659
5660                     if (*t == '/' || *t == '?' ||
5661                         ((*t == 'm' || *t == 's' || *t == 'y')
5662                          && !isALNUM(t[1])) ||
5663                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5664                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5665                                     "!=~ should be !~");
5666                 }
5667                 Eop(OP_NE);
5668             }
5669             if (tmp == '~')
5670                 PMop(OP_NOT);
5671         }
5672         s--;
5673         OPERATOR('!');
5674     case '<':
5675         if (PL_expect != XOPERATOR) {
5676             if (s[1] != '<' && !strchr(s,'>'))
5677                 check_uni();
5678             if (s[1] == '<')
5679                 s = scan_heredoc(s);
5680             else
5681                 s = scan_inputsymbol(s);
5682             TERM(sublex_start());
5683         }
5684         s++;
5685         {
5686             char tmp = *s++;
5687             if (tmp == '<')
5688                 SHop(OP_LEFT_SHIFT);
5689             if (tmp == '=') {
5690                 tmp = *s++;
5691                 if (tmp == '>')
5692                     Eop(OP_NCMP);
5693                 s--;
5694                 Rop(OP_LE);
5695             }
5696         }
5697         s--;
5698         Rop(OP_LT);
5699     case '>':
5700         s++;
5701         {
5702             const char tmp = *s++;
5703             if (tmp == '>')
5704                 SHop(OP_RIGHT_SHIFT);
5705             else if (tmp == '=')
5706                 Rop(OP_GE);
5707         }
5708         s--;
5709         Rop(OP_GT);
5710
5711     case '$':
5712         CLINE;
5713
5714         if (PL_expect == XOPERATOR) {
5715             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5716                 return deprecate_commaless_var_list();
5717             }
5718         }
5719
5720         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5721             PL_tokenbuf[0] = '@';
5722             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5723                            sizeof PL_tokenbuf - 1, FALSE);
5724             if (PL_expect == XOPERATOR)
5725                 no_op("Array length", s);
5726             if (!PL_tokenbuf[1])
5727                 PREREF(DOLSHARP);
5728             PL_expect = XOPERATOR;
5729             PL_pending_ident = '#';
5730             TOKEN(DOLSHARP);
5731         }
5732
5733         PL_tokenbuf[0] = '$';
5734         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5735                        sizeof PL_tokenbuf - 1, FALSE);
5736         if (PL_expect == XOPERATOR)
5737             no_op("Scalar", s);
5738         if (!PL_tokenbuf[1]) {
5739             if (s == PL_bufend)
5740                 yyerror("Final $ should be \\$ or $name");
5741             PREREF('$');
5742         }
5743
5744         /* This kludge not intended to be bulletproof. */
5745         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5746             pl_yylval.opval = newSVOP(OP_CONST, 0,
5747                                    newSViv(CopARYBASE_get(&PL_compiling)));
5748             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5749             TERM(THING);
5750         }
5751
5752         d = s;
5753         {
5754             const char tmp = *s;
5755             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5756                 s = SKIPSPACE1(s);
5757
5758             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5759                 && intuit_more(s)) {
5760                 if (*s == '[') {
5761                     PL_tokenbuf[0] = '@';
5762                     if (ckWARN(WARN_SYNTAX)) {
5763                         char *t = s+1;
5764
5765                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5766                             t++;
5767                         if (*t++ == ',') {
5768                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5769                             while (t < PL_bufend && *t != ']')
5770                                 t++;
5771                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5772                                         "Multidimensional syntax %.*s not supported",
5773                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5774                         }
5775                     }
5776                 }
5777                 else if (*s == '{') {
5778                     char *t;
5779                     PL_tokenbuf[0] = '%';
5780                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5781                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5782                         {
5783                             char tmpbuf[sizeof PL_tokenbuf];
5784                             do {
5785                                 t++;
5786                             } while (isSPACE(*t));
5787                             if (isIDFIRST_lazy_if(t,UTF)) {
5788                                 STRLEN len;
5789                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5790                                               &len);
5791                                 while (isSPACE(*t))
5792                                     t++;
5793                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5794                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5795                                                 "You need to quote \"%s\"",
5796                                                 tmpbuf);
5797                             }
5798                         }
5799                 }
5800             }
5801
5802             PL_expect = XOPERATOR;
5803             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5804                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5805                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5806                     PL_expect = XOPERATOR;
5807                 else if (strchr("$@\"'`q", *s))
5808                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5809                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5810                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5811                 else if (isIDFIRST_lazy_if(s,UTF)) {
5812                     char tmpbuf[sizeof PL_tokenbuf];
5813                     int t2;
5814                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5815                     if ((t2 = keyword(tmpbuf, len, 0))) {
5816                         /* binary operators exclude handle interpretations */
5817                         switch (t2) {
5818                         case -KEY_x:
5819                         case -KEY_eq:
5820                         case -KEY_ne:
5821                         case -KEY_gt:
5822                         case -KEY_lt:
5823                         case -KEY_ge:
5824                         case -KEY_le:
5825                         case -KEY_cmp:
5826                             break;
5827                         default:
5828                             PL_expect = XTERM;  /* e.g. print $fh length() */
5829                             break;
5830                         }
5831                     }
5832                     else {
5833                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5834                     }
5835                 }
5836                 else if (isDIGIT(*s))
5837                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5838                 else if (*s == '.' && isDIGIT(s[1]))
5839                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5840                 else if ((*s == '?' || *s == '-' || *s == '+')
5841                          && !isSPACE(s[1]) && s[1] != '=')
5842                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5843                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5844                          && s[1] != '/')
5845                     PL_expect = XTERM;          /* e.g. print $fh /.../
5846                                                    XXX except DORDOR operator
5847                                                 */
5848                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5849                          && s[2] != '=')
5850                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5851             }
5852         }
5853         PL_pending_ident = '$';
5854         TOKEN('$');
5855
5856     case '@':
5857         if (PL_expect == XOPERATOR)
5858             no_op("Array", s);
5859         PL_tokenbuf[0] = '@';
5860         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5861         if (!PL_tokenbuf[1]) {
5862             PREREF('@');
5863         }
5864         if (PL_lex_state == LEX_NORMAL)
5865             s = SKIPSPACE1(s);
5866         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5867             if (*s == '{')
5868                 PL_tokenbuf[0] = '%';
5869
5870             /* Warn about @ where they meant $. */
5871             if (*s == '[' || *s == '{') {
5872                 if (ckWARN(WARN_SYNTAX)) {
5873                     const char *t = s + 1;
5874                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5875                         t++;
5876                     if (*t == '}' || *t == ']') {
5877                         t++;
5878                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5879                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5880                             "Scalar value %.*s better written as $%.*s",
5881                             (int)(t-PL_bufptr), PL_bufptr,
5882                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5883                     }
5884                 }
5885             }
5886         }
5887         PL_pending_ident = '@';
5888         TERM('@');
5889
5890      case '/':                  /* may be division, defined-or, or pattern */
5891         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5892             s += 2;
5893             AOPERATOR(DORDOR);
5894         }
5895      case '?':                  /* may either be conditional or pattern */
5896         if (PL_expect == XOPERATOR) {
5897              char tmp = *s++;
5898              if(tmp == '?') {
5899                 OPERATOR('?');
5900              }
5901              else {
5902                  tmp = *s++;
5903                  if(tmp == '/') {
5904                      /* A // operator. */
5905                     AOPERATOR(DORDOR);
5906                  }
5907                  else {
5908                      s--;
5909                      Mop(OP_DIVIDE);
5910                  }
5911              }
5912          }
5913          else {
5914              /* Disable warning on "study /blah/" */
5915              if (PL_oldoldbufptr == PL_last_uni
5916               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5917                   || memNE(PL_last_uni, "study", 5)
5918                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5919               ))
5920                  check_uni();
5921              s = scan_pat(s,OP_MATCH);
5922              TERM(sublex_start());
5923          }
5924
5925     case '.':
5926         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5927 #ifdef PERL_STRICT_CR
5928             && s[1] == '\n'
5929 #else
5930             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5931 #endif
5932             && (s == PL_linestart || s[-1] == '\n') )
5933         {
5934             PL_lex_formbrack = 0;
5935             PL_expect = XSTATE;
5936             goto rightbracket;
5937         }
5938         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5939             s += 3;
5940             OPERATOR(YADAYADA);
5941         }
5942         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5943             char tmp = *s++;
5944             if (*s == tmp) {
5945                 s++;
5946                 if (*s == tmp) {
5947                     s++;
5948                     pl_yylval.ival = OPf_SPECIAL;
5949                 }
5950                 else
5951                     pl_yylval.ival = 0;
5952                 OPERATOR(DOTDOT);
5953             }
5954             Aop(OP_CONCAT);
5955         }
5956         /* FALL THROUGH */
5957     case '0': case '1': case '2': case '3': case '4':
5958     case '5': case '6': case '7': case '8': case '9':
5959         s = scan_num(s, &pl_yylval);
5960         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5961         if (PL_expect == XOPERATOR)
5962             no_op("Number",s);
5963         TERM(THING);
5964
5965     case '\'':
5966         s = scan_str(s,!!PL_madskills,FALSE);
5967         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5968         if (PL_expect == XOPERATOR) {
5969             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5970                 return deprecate_commaless_var_list();
5971             }
5972             else
5973                 no_op("String",s);
5974         }
5975         if (!s)
5976             missingterm(NULL);
5977         pl_yylval.ival = OP_CONST;
5978         TERM(sublex_start());
5979
5980     case '"':
5981         s = scan_str(s,!!PL_madskills,FALSE);
5982         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5983         if (PL_expect == XOPERATOR) {
5984             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5985                 return deprecate_commaless_var_list();
5986             }
5987             else
5988                 no_op("String",s);
5989         }
5990         if (!s)
5991             missingterm(NULL);
5992         pl_yylval.ival = OP_CONST;
5993         /* FIXME. I think that this can be const if char *d is replaced by
5994            more localised variables.  */
5995         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5996             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5997                 pl_yylval.ival = OP_STRINGIFY;
5998                 break;
5999             }
6000         }
6001         TERM(sublex_start());
6002
6003     case '`':
6004         s = scan_str(s,!!PL_madskills,FALSE);
6005         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6006         if (PL_expect == XOPERATOR)
6007             no_op("Backticks",s);
6008         if (!s)
6009             missingterm(NULL);
6010         readpipe_override();
6011         TERM(sublex_start());
6012
6013     case '\\':
6014         s++;
6015         if (PL_lex_inwhat && isDIGIT(*s))
6016             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6017                            *s, *s);
6018         if (PL_expect == XOPERATOR)
6019             no_op("Backslash",s);
6020         OPERATOR(REFGEN);
6021
6022     case 'v':
6023         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6024             char *start = s + 2;
6025             while (isDIGIT(*start) || *start == '_')
6026                 start++;
6027             if (*start == '.' && isDIGIT(start[1])) {
6028                 s = scan_num(s, &pl_yylval);
6029                 TERM(THING);
6030             }
6031             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6032             else if (!isALPHA(*start) && (PL_expect == XTERM
6033                         || PL_expect == XREF || PL_expect == XSTATE
6034                         || PL_expect == XTERMORDORDOR)) {
6035                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6036                 if (!gv) {
6037                     s = scan_num(s, &pl_yylval);
6038                     TERM(THING);
6039                 }
6040             }
6041         }
6042         goto keylookup;
6043     case 'x':
6044         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6045             s++;
6046             Mop(OP_REPEAT);
6047         }
6048         goto keylookup;
6049
6050     case '_':
6051     case 'a': case 'A':
6052     case 'b': case 'B':
6053     case 'c': case 'C':
6054     case 'd': case 'D':
6055     case 'e': case 'E':
6056     case 'f': case 'F':
6057     case 'g': case 'G':
6058     case 'h': case 'H':
6059     case 'i': case 'I':
6060     case 'j': case 'J':
6061     case 'k': case 'K':
6062     case 'l': case 'L':
6063     case 'm': case 'M':
6064     case 'n': case 'N':
6065     case 'o': case 'O':
6066     case 'p': case 'P':
6067     case 'q': case 'Q':
6068     case 'r': case 'R':
6069     case 's': case 'S':
6070     case 't': case 'T':
6071     case 'u': case 'U':
6072               case 'V':
6073     case 'w': case 'W':
6074               case 'X':
6075     case 'y': case 'Y':
6076     case 'z': case 'Z':
6077
6078       keylookup: {
6079         bool anydelim;
6080         I32 tmp;
6081
6082         orig_keyword = 0;
6083         gv = NULL;
6084         gvp = NULL;
6085
6086         PL_bufptr = s;
6087         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6088
6089         /* Some keywords can be followed by any delimiter, including ':' */
6090         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6091                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6092                              (PL_tokenbuf[0] == 'q' &&
6093                               strchr("qwxr", PL_tokenbuf[1])))));
6094
6095         /* x::* is just a word, unless x is "CORE" */
6096         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6097             goto just_a_word;
6098
6099         d = s;
6100         while (d < PL_bufend && isSPACE(*d))
6101                 d++;    /* no comments skipped here, or s### is misparsed */
6102
6103         /* Is this a word before a => operator? */
6104         if (*d == '=' && d[1] == '>') {
6105             CLINE;
6106             pl_yylval.opval
6107                 = (OP*)newSVOP(OP_CONST, 0,
6108                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6109             pl_yylval.opval->op_private = OPpCONST_BARE;
6110             TERM(WORD);
6111         }
6112
6113         /* Check for plugged-in keyword */
6114         {
6115             OP *o;
6116             int result;
6117             char *saved_bufptr = PL_bufptr;
6118             PL_bufptr = s;
6119             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6120             s = PL_bufptr;
6121             if (result == KEYWORD_PLUGIN_DECLINE) {
6122                 /* not a plugged-in keyword */
6123                 PL_bufptr = saved_bufptr;
6124             } else if (result == KEYWORD_PLUGIN_STMT) {
6125                 pl_yylval.opval = o;
6126                 CLINE;
6127                 PL_expect = XSTATE;
6128                 return REPORT(PLUGSTMT);
6129             } else if (result == KEYWORD_PLUGIN_EXPR) {
6130                 pl_yylval.opval = o;
6131                 CLINE;
6132                 PL_expect = XOPERATOR;
6133                 return REPORT(PLUGEXPR);
6134             } else {
6135                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6136                                         PL_tokenbuf);
6137             }
6138         }
6139
6140         /* Check for built-in keyword */
6141         tmp = keyword(PL_tokenbuf, len, 0);
6142
6143         /* Is this a label? */
6144         if (!anydelim && PL_expect == XSTATE
6145               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6146             s = d + 1;
6147             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6148             CLINE;
6149             TOKEN(LABEL);
6150         }
6151
6152         if (tmp < 0) {                  /* second-class keyword? */
6153             GV *ogv = NULL;     /* override (winner) */
6154             GV *hgv = NULL;     /* hidden (loser) */
6155             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6156                 CV *cv;
6157                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6158                     (cv = GvCVu(gv)))
6159                 {
6160                     if (GvIMPORTED_CV(gv))
6161                         ogv = gv;
6162                     else if (! CvMETHOD(cv))
6163                         hgv = gv;
6164                 }
6165                 if (!ogv &&
6166                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6167                     (gv = *gvp) && isGV_with_GP(gv) &&
6168                     GvCVu(gv) && GvIMPORTED_CV(gv))
6169                 {
6170                     ogv = gv;
6171                 }
6172             }
6173             if (ogv) {
6174                 orig_keyword = tmp;
6175                 tmp = 0;                /* overridden by import or by GLOBAL */
6176             }
6177             else if (gv && !gvp
6178                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6179                      && GvCVu(gv))
6180             {
6181                 tmp = 0;                /* any sub overrides "weak" keyword */
6182             }
6183             else {                      /* no override */
6184                 tmp = -tmp;
6185                 if (tmp == KEY_dump) {
6186                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6187                                    "dump() better written as CORE::dump()");
6188                 }
6189                 gv = NULL;
6190                 gvp = 0;
6191                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6192                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6193                                    "Ambiguous call resolved as CORE::%s(), %s",
6194                                    GvENAME(hgv), "qualify as such or use &");
6195             }
6196         }
6197
6198       reserved_word:
6199         switch (tmp) {
6200
6201         default:                        /* not a keyword */
6202             /* Trade off - by using this evil construction we can pull the
6203                variable gv into the block labelled keylookup. If not, then
6204                we have to give it function scope so that the goto from the
6205                earlier ':' case doesn't bypass the initialisation.  */
6206             if (0) {
6207             just_a_word_zero_gv:
6208                 gv = NULL;
6209                 gvp = NULL;
6210                 orig_keyword = 0;
6211             }
6212           just_a_word: {
6213                 SV *sv;
6214                 int pkgname = 0;
6215                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6216                 OP *rv2cv_op;
6217                 CV *cv;
6218 #ifdef PERL_MAD
6219                 SV *nextPL_nextwhite = 0;
6220 #endif
6221
6222
6223                 /* Get the rest if it looks like a package qualifier */
6224
6225                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6226                     STRLEN morelen;
6227                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6228                                   TRUE, &morelen);
6229                     if (!morelen)
6230                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6231                                 *s == '\'' ? "'" : "::");
6232                     len += morelen;
6233                     pkgname = 1;
6234                 }
6235
6236                 if (PL_expect == XOPERATOR) {
6237                     if (PL_bufptr == PL_linestart) {
6238                         CopLINE_dec(PL_curcop);
6239                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6240                         CopLINE_inc(PL_curcop);
6241                     }
6242                     else
6243                         no_op("Bareword",s);
6244                 }
6245
6246                 /* Look for a subroutine with this name in current package,
6247                    unless name is "Foo::", in which case Foo is a bearword
6248                    (and a package name). */
6249
6250                 if (len > 2 && !PL_madskills &&
6251                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6252                 {
6253                     if (ckWARN(WARN_BAREWORD)
6254                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6255                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6256                             "Bareword \"%s\" refers to nonexistent package",
6257                              PL_tokenbuf);
6258                     len -= 2;
6259                     PL_tokenbuf[len] = '\0';
6260                     gv = NULL;
6261                     gvp = 0;
6262                 }
6263                 else {
6264                     if (!gv) {
6265                         /* Mustn't actually add anything to a symbol table.
6266                            But also don't want to "initialise" any placeholder
6267                            constants that might already be there into full
6268                            blown PVGVs with attached PVCV.  */
6269                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6270                                                GV_NOADD_NOINIT, SVt_PVCV);
6271                     }
6272                     len = 0;
6273                 }
6274
6275                 /* if we saw a global override before, get the right name */
6276
6277                 if (gvp) {
6278                     sv = newSVpvs("CORE::GLOBAL::");
6279                     sv_catpv(sv,PL_tokenbuf);
6280                 }
6281                 else {
6282                     /* If len is 0, newSVpv does strlen(), which is correct.
6283                        If len is non-zero, then it will be the true length,
6284                        and so the scalar will be created correctly.  */
6285                     sv = newSVpv(PL_tokenbuf,len);
6286                 }
6287 #ifdef PERL_MAD
6288                 if (PL_madskills && !PL_thistoken) {
6289                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6290                     PL_thistoken = newSVpvn(start,s - start);
6291                     PL_realtokenstart = s - SvPVX(PL_linestr);
6292                 }
6293 #endif
6294
6295                 /* Presume this is going to be a bareword of some sort. */
6296
6297                 CLINE;
6298                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6299                 pl_yylval.opval->op_private = OPpCONST_BARE;
6300                 /* UTF-8 package name? */
6301                 if (UTF && !IN_BYTES &&
6302                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
6303                     SvUTF8_on(sv);
6304
6305                 /* And if "Foo::", then that's what it certainly is. */
6306
6307                 if (len)
6308                     goto safe_bareword;
6309
6310                 cv = NULL;
6311                 {
6312                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6313                     const_op->op_private = OPpCONST_BARE;
6314                     rv2cv_op = newCVREF(0, const_op);
6315                 }
6316                 if (rv2cv_op->op_type == OP_RV2CV &&
6317                         (rv2cv_op->op_flags & OPf_KIDS)) {
6318                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6319                     switch (rv_op->op_type) {
6320                         case OP_CONST: {
6321                             SV *sv = cSVOPx_sv(rv_op);
6322                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6323                                 cv = (CV*)SvRV(sv);
6324                         } break;
6325                         case OP_GV: {
6326                             GV *gv = cGVOPx_gv(rv_op);
6327                             CV *maybe_cv = GvCVu(gv);
6328                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6329                                 cv = maybe_cv;
6330                         } break;
6331                     }
6332                 }
6333
6334                 /* See if it's the indirect object for a list operator. */
6335
6336                 if (PL_oldoldbufptr &&
6337                     PL_oldoldbufptr < PL_bufptr &&
6338                     (PL_oldoldbufptr == PL_last_lop
6339                      || PL_oldoldbufptr == PL_last_uni) &&
6340                     /* NO SKIPSPACE BEFORE HERE! */
6341                     (PL_expect == XREF ||
6342                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6343                 {
6344                     bool immediate_paren = *s == '(';
6345
6346                     /* (Now we can afford to cross potential line boundary.) */
6347                     s = SKIPSPACE2(s,nextPL_nextwhite);
6348 #ifdef PERL_MAD
6349                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6350 #endif
6351
6352                     /* Two barewords in a row may indicate method call. */
6353
6354                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6355                         (tmp = intuit_method(s, gv, cv))) {
6356                         op_free(rv2cv_op);
6357                         return REPORT(tmp);
6358                     }
6359
6360                     /* If not a declared subroutine, it's an indirect object. */
6361                     /* (But it's an indir obj regardless for sort.) */
6362                     /* Also, if "_" follows a filetest operator, it's a bareword */
6363
6364                     if (
6365                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6366                          (!cv &&
6367                         (PL_last_lop_op != OP_MAPSTART &&
6368                          PL_last_lop_op != OP_GREPSTART))))
6369                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6370                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6371                        )
6372                     {
6373                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6374                         goto bareword;
6375                     }
6376                 }
6377
6378                 PL_expect = XOPERATOR;
6379 #ifdef PERL_MAD
6380                 if (isSPACE(*s))
6381                     s = SKIPSPACE2(s,nextPL_nextwhite);
6382                 PL_nextwhite = nextPL_nextwhite;
6383 #else
6384                 s = skipspace(s);
6385 #endif
6386
6387                 /* Is this a word before a => operator? */
6388                 if (*s == '=' && s[1] == '>' && !pkgname) {
6389                     op_free(rv2cv_op);
6390                     CLINE;
6391                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6392                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6393                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6394                     TERM(WORD);
6395                 }
6396
6397                 /* If followed by a paren, it's certainly a subroutine. */
6398                 if (*s == '(') {
6399                     CLINE;
6400                     if (cv) {
6401                         d = s + 1;
6402                         while (SPACE_OR_TAB(*d))
6403                             d++;
6404                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6405                             s = d + 1;
6406                             goto its_constant;
6407                         }
6408                     }
6409 #ifdef PERL_MAD
6410                     if (PL_madskills) {
6411                         PL_nextwhite = PL_thiswhite;
6412                         PL_thiswhite = 0;
6413                     }
6414                     start_force(PL_curforce);
6415 #endif
6416                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6417                     PL_expect = XOPERATOR;
6418 #ifdef PERL_MAD
6419                     if (PL_madskills) {
6420                         PL_nextwhite = nextPL_nextwhite;
6421                         curmad('X', PL_thistoken);
6422                         PL_thistoken = newSVpvs("");
6423                     }
6424 #endif
6425                     op_free(rv2cv_op);
6426                     force_next(WORD);
6427                     pl_yylval.ival = 0;
6428                     TOKEN('&');
6429                 }
6430
6431                 /* If followed by var or block, call it a method (unless sub) */
6432
6433                 if ((*s == '$' || *s == '{') && !cv) {
6434                     op_free(rv2cv_op);
6435                     PL_last_lop = PL_oldbufptr;
6436                     PL_last_lop_op = OP_METHOD;
6437                     PREBLOCK(METHOD);
6438                 }
6439
6440                 /* If followed by a bareword, see if it looks like indir obj. */
6441
6442                 if (!orig_keyword
6443                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6444                         && (tmp = intuit_method(s, gv, cv))) {
6445                     op_free(rv2cv_op);
6446                     return REPORT(tmp);
6447                 }
6448
6449                 /* Not a method, so call it a subroutine (if defined) */
6450
6451                 if (cv) {
6452                     if (lastchar == '-')
6453                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6454                                          "Ambiguous use of -%s resolved as -&%s()",
6455                                          PL_tokenbuf, PL_tokenbuf);
6456                     /* Check for a constant sub */
6457                     if ((sv = cv_const_sv(cv))) {
6458                   its_constant:
6459                         op_free(rv2cv_op);
6460                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6461                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6462                         pl_yylval.opval->op_private = 0;
6463                         TOKEN(WORD);
6464                     }
6465
6466                     op_free(pl_yylval.opval);
6467                     pl_yylval.opval = rv2cv_op;
6468                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6469                     PL_last_lop = PL_oldbufptr;
6470                     PL_last_lop_op = OP_ENTERSUB;
6471                     /* Is there a prototype? */
6472                     if (
6473 #ifdef PERL_MAD
6474                         cv &&
6475 #endif
6476                         SvPOK(cv))
6477                     {
6478                         STRLEN protolen;
6479                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6480                         if (!protolen)
6481                             TERM(FUNC0SUB);
6482                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6483                             OPERATOR(UNIOPSUB);
6484                         while (*proto == ';')
6485                             proto++;
6486                         if (*proto == '&' && *s == '{') {
6487                             if (PL_curstash)
6488                                 sv_setpvs(PL_subname, "__ANON__");
6489                             else
6490                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6491                             PREBLOCK(LSTOPSUB);
6492                         }
6493                     }
6494 #ifdef PERL_MAD
6495                     {
6496                         if (PL_madskills) {
6497                             PL_nextwhite = PL_thiswhite;
6498                             PL_thiswhite = 0;
6499                         }
6500                         start_force(PL_curforce);
6501                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6502                         PL_expect = XTERM;
6503                         if (PL_madskills) {
6504                             PL_nextwhite = nextPL_nextwhite;
6505                             curmad('X', PL_thistoken);
6506                             PL_thistoken = newSVpvs("");
6507                         }
6508                         force_next(WORD);
6509                         TOKEN(NOAMP);
6510                     }
6511                 }
6512
6513                 /* Guess harder when madskills require "best effort". */
6514                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6515                     int probable_sub = 0;
6516                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6517                         probable_sub = 1;
6518                     else if (isALPHA(*s)) {
6519                         char tmpbuf[1024];
6520                         STRLEN tmplen;
6521                         d = s;
6522                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6523                         if (!keyword(tmpbuf, tmplen, 0))
6524                             probable_sub = 1;
6525                         else {
6526                             while (d < PL_bufend && isSPACE(*d))
6527                                 d++;
6528                             if (*d == '=' && d[1] == '>')
6529                                 probable_sub = 1;
6530                         }
6531                     }
6532                     if (probable_sub) {
6533                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6534                         op_free(pl_yylval.opval);
6535                         pl_yylval.opval = rv2cv_op;
6536                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6537                         PL_last_lop = PL_oldbufptr;
6538                         PL_last_lop_op = OP_ENTERSUB;
6539                         PL_nextwhite = PL_thiswhite;
6540                         PL_thiswhite = 0;
6541                         start_force(PL_curforce);
6542                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6543                         PL_expect = XTERM;
6544                         PL_nextwhite = nextPL_nextwhite;
6545                         curmad('X', PL_thistoken);
6546                         PL_thistoken = newSVpvs("");
6547                         force_next(WORD);
6548                         TOKEN(NOAMP);
6549                     }
6550 #else
6551                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6552                     PL_expect = XTERM;
6553                     force_next(WORD);
6554                     TOKEN(NOAMP);
6555 #endif
6556                 }
6557
6558                 /* Call it a bare word */
6559
6560                 if (PL_hints & HINT_STRICT_SUBS)
6561                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6562                 else {
6563                 bareword:
6564                     /* after "print" and similar functions (corresponding to
6565                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6566                      * a filehandle should be subject to "strict subs".
6567                      * Likewise for the optional indirect-object argument to system
6568                      * or exec, which can't be a bareword */
6569                     if ((PL_last_lop_op == OP_PRINT
6570                             || PL_last_lop_op == OP_PRTF
6571                             || PL_last_lop_op == OP_SAY
6572                             || PL_last_lop_op == OP_SYSTEM
6573                             || PL_last_lop_op == OP_EXEC)
6574                             && (PL_hints & HINT_STRICT_SUBS))
6575                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6576                     if (lastchar != '-') {
6577                         if (ckWARN(WARN_RESERVED)) {
6578                             d = PL_tokenbuf;
6579                             while (isLOWER(*d))
6580                                 d++;
6581                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6582                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6583                                        PL_tokenbuf);
6584                         }
6585                     }
6586                 }
6587                 op_free(rv2cv_op);
6588
6589             safe_bareword:
6590                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6591                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6592                                      "Operator or semicolon missing before %c%s",
6593                                      lastchar, PL_tokenbuf);
6594                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6595                                      "Ambiguous use of %c resolved as operator %c",
6596                                      lastchar, lastchar);
6597                 }
6598                 TOKEN(WORD);
6599             }
6600
6601         case KEY___FILE__:
6602             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6603                                         newSVpv(CopFILE(PL_curcop),0));
6604             TERM(THING);
6605
6606         case KEY___LINE__:
6607             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6608                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6609             TERM(THING);
6610
6611         case KEY___PACKAGE__:
6612             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6613                                         (PL_curstash
6614                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6615                                          : &PL_sv_undef));
6616             TERM(THING);
6617
6618         case KEY___DATA__:
6619         case KEY___END__: {
6620             GV *gv;
6621             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6622                 const char *pname = "main";
6623                 if (PL_tokenbuf[2] == 'D')
6624                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6625                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6626                                 SVt_PVIO);
6627                 GvMULTI_on(gv);
6628                 if (!GvIO(gv))
6629                     GvIOp(gv) = newIO();
6630                 IoIFP(GvIOp(gv)) = PL_rsfp;
6631 #if defined(HAS_FCNTL) && defined(F_SETFD)
6632                 {
6633                     const int fd = PerlIO_fileno(PL_rsfp);
6634                     fcntl(fd,F_SETFD,fd >= 3);
6635                 }
6636 #endif
6637                 /* Mark this internal pseudo-handle as clean */
6638                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6639                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6640                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6641                 else
6642                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6643 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6644                 /* if the script was opened in binmode, we need to revert
6645                  * it to text mode for compatibility; but only iff it has CRs
6646                  * XXX this is a questionable hack at best. */
6647                 if (PL_bufend-PL_bufptr > 2
6648                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6649                 {
6650                     Off_t loc = 0;
6651                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6652                         loc = PerlIO_tell(PL_rsfp);
6653                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6654                     }
6655 #ifdef NETWARE
6656                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6657 #else
6658                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6659 #endif  /* NETWARE */
6660 #ifdef PERLIO_IS_STDIO /* really? */
6661 #  if defined(__BORLANDC__)
6662                         /* XXX see note in do_binmode() */
6663                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6664 #  endif
6665 #endif
6666                         if (loc > 0)
6667                             PerlIO_seek(PL_rsfp, loc, 0);
6668                     }
6669                 }
6670 #endif
6671 #ifdef PERLIO_LAYERS
6672                 if (!IN_BYTES) {
6673                     if (UTF)
6674                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6675                     else if (PL_encoding) {
6676                         SV *name;
6677                         dSP;
6678                         ENTER;
6679                         SAVETMPS;
6680                         PUSHMARK(sp);
6681                         EXTEND(SP, 1);
6682                         XPUSHs(PL_encoding);
6683                         PUTBACK;
6684                         call_method("name", G_SCALAR);
6685                         SPAGAIN;
6686                         name = POPs;
6687                         PUTBACK;
6688                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6689                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6690                                                       SVfARG(name)));
6691                         FREETMPS;
6692                         LEAVE;
6693                     }
6694                 }
6695 #endif
6696 #ifdef PERL_MAD
6697                 if (PL_madskills) {
6698                     if (PL_realtokenstart >= 0) {
6699                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6700                         if (!PL_endwhite)
6701                             PL_endwhite = newSVpvs("");
6702                         sv_catsv(PL_endwhite, PL_thiswhite);
6703                         PL_thiswhite = 0;
6704                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6705                         PL_realtokenstart = -1;
6706                     }
6707                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6708                            != NULL) ;
6709                 }
6710 #endif
6711                 PL_rsfp = NULL;
6712             }
6713             goto fake_eof;
6714         }
6715
6716         case KEY_AUTOLOAD:
6717         case KEY_DESTROY:
6718         case KEY_BEGIN:
6719         case KEY_UNITCHECK:
6720         case KEY_CHECK:
6721         case KEY_INIT:
6722         case KEY_END:
6723             if (PL_expect == XSTATE) {
6724                 s = PL_bufptr;
6725                 goto really_sub;
6726             }
6727             goto just_a_word;
6728
6729         case KEY_CORE:
6730             if (*s == ':' && s[1] == ':') {
6731                 s += 2;
6732                 d = s;
6733                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6734                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6735                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6736                 if (tmp < 0)
6737                     tmp = -tmp;
6738                 else if (tmp == KEY_require || tmp == KEY_do)
6739                     /* that's a way to remember we saw "CORE::" */
6740                     orig_keyword = tmp;
6741                 goto reserved_word;
6742             }
6743             goto just_a_word;
6744
6745         case KEY_abs:
6746             UNI(OP_ABS);
6747
6748         case KEY_alarm:
6749             UNI(OP_ALARM);
6750
6751         case KEY_accept:
6752             LOP(OP_ACCEPT,XTERM);
6753
6754         case KEY_and:
6755             OPERATOR(ANDOP);
6756
6757         case KEY_atan2:
6758             LOP(OP_ATAN2,XTERM);
6759
6760         case KEY_bind:
6761             LOP(OP_BIND,XTERM);
6762
6763         case KEY_binmode:
6764             LOP(OP_BINMODE,XTERM);
6765
6766         case KEY_bless:
6767             LOP(OP_BLESS,XTERM);
6768
6769         case KEY_break:
6770             FUN0(OP_BREAK);
6771
6772         case KEY_chop:
6773             UNI(OP_CHOP);
6774
6775         case KEY_continue:
6776             /* When 'use switch' is in effect, continue has a dual
6777                life as a control operator. */
6778             {
6779                 if (!FEATURE_IS_ENABLED("switch"))
6780                     PREBLOCK(CONTINUE);
6781                 else {
6782                     /* We have to disambiguate the two senses of
6783                       "continue". If the next token is a '{' then
6784                       treat it as the start of a continue block;
6785                       otherwise treat it as a control operator.
6786                      */
6787                     s = skipspace(s);
6788                     if (*s == '{')
6789             PREBLOCK(CONTINUE);
6790                     else
6791                         FUN0(OP_CONTINUE);
6792                 }
6793             }
6794
6795         case KEY_chdir:
6796             /* may use HOME */
6797             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6798             UNI(OP_CHDIR);
6799
6800         case KEY_close:
6801             UNI(OP_CLOSE);
6802
6803         case KEY_closedir:
6804             UNI(OP_CLOSEDIR);
6805
6806         case KEY_cmp:
6807             Eop(OP_SCMP);
6808
6809         case KEY_caller:
6810             UNI(OP_CALLER);
6811
6812         case KEY_crypt:
6813 #ifdef FCRYPT
6814             if (!PL_cryptseen) {
6815                 PL_cryptseen = TRUE;
6816                 init_des();
6817             }
6818 #endif
6819             LOP(OP_CRYPT,XTERM);
6820
6821         case KEY_chmod:
6822             LOP(OP_CHMOD,XTERM);
6823
6824         case KEY_chown:
6825             LOP(OP_CHOWN,XTERM);
6826
6827         case KEY_connect:
6828             LOP(OP_CONNECT,XTERM);
6829
6830         case KEY_chr:
6831             UNI(OP_CHR);
6832
6833         case KEY_cos:
6834             UNI(OP_COS);
6835
6836         case KEY_chroot:
6837             UNI(OP_CHROOT);
6838
6839         case KEY_default:
6840             PREBLOCK(DEFAULT);
6841
6842         case KEY_do:
6843             s = SKIPSPACE1(s);
6844             if (*s == '{')
6845                 PRETERMBLOCK(DO);
6846             if (*s != '\'')
6847                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6848             if (orig_keyword == KEY_do) {
6849                 orig_keyword = 0;
6850                 pl_yylval.ival = 1;
6851             }
6852             else
6853                 pl_yylval.ival = 0;
6854             OPERATOR(DO);
6855
6856         case KEY_die:
6857             PL_hints |= HINT_BLOCK_SCOPE;
6858             LOP(OP_DIE,XTERM);
6859
6860         case KEY_defined:
6861             UNI(OP_DEFINED);
6862
6863         case KEY_delete:
6864             UNI(OP_DELETE);
6865
6866         case KEY_dbmopen:
6867             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6868             LOP(OP_DBMOPEN,XTERM);
6869
6870         case KEY_dbmclose:
6871             UNI(OP_DBMCLOSE);
6872
6873         case KEY_dump:
6874             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6875             LOOPX(OP_DUMP);
6876
6877         case KEY_else:
6878             PREBLOCK(ELSE);
6879
6880         case KEY_elsif:
6881             pl_yylval.ival = CopLINE(PL_curcop);
6882             OPERATOR(ELSIF);
6883
6884         case KEY_eq:
6885             Eop(OP_SEQ);
6886
6887         case KEY_exists:
6888             UNI(OP_EXISTS);
6889
6890         case KEY_exit:
6891             if (PL_madskills)
6892                 UNI(OP_INT);
6893             UNI(OP_EXIT);
6894
6895         case KEY_eval:
6896             s = SKIPSPACE1(s);
6897             if (*s == '{') { /* block eval */
6898                 PL_expect = XTERMBLOCK;
6899                 UNIBRACK(OP_ENTERTRY);
6900             }
6901             else { /* string eval */
6902                 PL_expect = XTERM;
6903                 UNIBRACK(OP_ENTEREVAL);
6904             }
6905
6906         case KEY_eof:
6907             UNI(OP_EOF);
6908
6909         case KEY_exp:
6910             UNI(OP_EXP);
6911
6912         case KEY_each:
6913             UNI(OP_EACH);
6914
6915         case KEY_exec:
6916             LOP(OP_EXEC,XREF);
6917
6918         case KEY_endhostent:
6919             FUN0(OP_EHOSTENT);
6920
6921         case KEY_endnetent:
6922             FUN0(OP_ENETENT);
6923
6924         case KEY_endservent:
6925             FUN0(OP_ESERVENT);
6926
6927         case KEY_endprotoent:
6928             FUN0(OP_EPROTOENT);
6929
6930         case KEY_endpwent:
6931             FUN0(OP_EPWENT);
6932
6933         case KEY_endgrent:
6934             FUN0(OP_EGRENT);
6935
6936         case KEY_for:
6937         case KEY_foreach:
6938             pl_yylval.ival = CopLINE(PL_curcop);
6939             s = SKIPSPACE1(s);
6940             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6941                 char *p = s;
6942 #ifdef PERL_MAD
6943                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6944 #endif
6945
6946                 if ((PL_bufend - p) >= 3 &&
6947                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6948                     p += 2;
6949                 else if ((PL_bufend - p) >= 4 &&
6950                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6951                     p += 3;
6952                 p = PEEKSPACE(p);
6953                 if (isIDFIRST_lazy_if(p,UTF)) {
6954                     p = scan_ident(p, PL_bufend,
6955                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6956                     p = PEEKSPACE(p);
6957                 }
6958                 if (*p != '$')
6959                     Perl_croak(aTHX_ "Missing $ on loop variable");
6960 #ifdef PERL_MAD
6961                 s = SvPVX(PL_linestr) + soff;
6962 #endif
6963             }
6964             OPERATOR(FOR);
6965
6966         case KEY_formline:
6967             LOP(OP_FORMLINE,XTERM);
6968
6969         case KEY_fork:
6970             FUN0(OP_FORK);
6971
6972         case KEY_fcntl:
6973             LOP(OP_FCNTL,XTERM);
6974
6975         case KEY_fileno:
6976             UNI(OP_FILENO);
6977
6978         case KEY_flock:
6979             LOP(OP_FLOCK,XTERM);
6980
6981         case KEY_gt:
6982             Rop(OP_SGT);
6983
6984         case KEY_ge:
6985             Rop(OP_SGE);
6986
6987         case KEY_grep:
6988             LOP(OP_GREPSTART, XREF);
6989
6990         case KEY_goto:
6991             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6992             LOOPX(OP_GOTO);
6993
6994         case KEY_gmtime:
6995             UNI(OP_GMTIME);
6996
6997         case KEY_getc:
6998             UNIDOR(OP_GETC);
6999
7000         case KEY_getppid:
7001             FUN0(OP_GETPPID);
7002
7003         case KEY_getpgrp:
7004             UNI(OP_GETPGRP);
7005
7006         case KEY_getpriority:
7007             LOP(OP_GETPRIORITY,XTERM);
7008
7009         case KEY_getprotobyname:
7010             UNI(OP_GPBYNAME);
7011
7012         case KEY_getprotobynumber:
7013             LOP(OP_GPBYNUMBER,XTERM);
7014
7015         case KEY_getprotoent:
7016             FUN0(OP_GPROTOENT);
7017
7018         case KEY_getpwent:
7019             FUN0(OP_GPWENT);
7020
7021         case KEY_getpwnam:
7022             UNI(OP_GPWNAM);
7023
7024         case KEY_getpwuid:
7025             UNI(OP_GPWUID);
7026
7027         case KEY_getpeername:
7028             UNI(OP_GETPEERNAME);
7029
7030         case KEY_gethostbyname:
7031             UNI(OP_GHBYNAME);
7032
7033         case KEY_gethostbyaddr:
7034             LOP(OP_GHBYADDR,XTERM);
7035
7036         case KEY_gethostent:
7037             FUN0(OP_GHOSTENT);
7038
7039         case KEY_getnetbyname:
7040             UNI(OP_GNBYNAME);
7041
7042         case KEY_getnetbyaddr:
7043             LOP(OP_GNBYADDR,XTERM);
7044
7045         case KEY_getnetent:
7046             FUN0(OP_GNETENT);
7047
7048         case KEY_getservbyname:
7049             LOP(OP_GSBYNAME,XTERM);
7050
7051         case KEY_getservbyport:
7052             LOP(OP_GSBYPORT,XTERM);
7053
7054         case KEY_getservent:
7055             FUN0(OP_GSERVENT);
7056
7057         case KEY_getsockname:
7058             UNI(OP_GETSOCKNAME);
7059
7060         case KEY_getsockopt:
7061             LOP(OP_GSOCKOPT,XTERM);
7062
7063         case KEY_getgrent:
7064             FUN0(OP_GGRENT);
7065
7066         case KEY_getgrnam:
7067             UNI(OP_GGRNAM);
7068
7069         case KEY_getgrgid:
7070             UNI(OP_GGRGID);
7071
7072         case KEY_getlogin:
7073             FUN0(OP_GETLOGIN);
7074
7075         case KEY_given:
7076             pl_yylval.ival = CopLINE(PL_curcop);
7077             OPERATOR(GIVEN);
7078
7079         case KEY_glob:
7080             LOP(OP_GLOB,XTERM);
7081
7082         case KEY_hex:
7083             UNI(OP_HEX);
7084
7085         case KEY_if:
7086             pl_yylval.ival = CopLINE(PL_curcop);
7087             OPERATOR(IF);
7088
7089         case KEY_index:
7090             LOP(OP_INDEX,XTERM);
7091
7092         case KEY_int:
7093             UNI(OP_INT);
7094
7095         case KEY_ioctl:
7096             LOP(OP_IOCTL,XTERM);
7097
7098         case KEY_join:
7099             LOP(OP_JOIN,XTERM);
7100
7101         case KEY_keys:
7102             UNI(OP_KEYS);
7103
7104         case KEY_kill:
7105             LOP(OP_KILL,XTERM);
7106
7107         case KEY_last:
7108             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7109             LOOPX(OP_LAST);
7110
7111         case KEY_lc:
7112             UNI(OP_LC);
7113
7114         case KEY_lcfirst:
7115             UNI(OP_LCFIRST);
7116
7117         case KEY_local:
7118             pl_yylval.ival = 0;
7119             OPERATOR(LOCAL);
7120
7121         case KEY_length:
7122             UNI(OP_LENGTH);
7123
7124         case KEY_lt:
7125             Rop(OP_SLT);
7126
7127         case KEY_le:
7128             Rop(OP_SLE);
7129
7130         case KEY_localtime:
7131             UNI(OP_LOCALTIME);
7132
7133         case KEY_log:
7134             UNI(OP_LOG);
7135
7136         case KEY_link:
7137             LOP(OP_LINK,XTERM);
7138
7139         case KEY_listen:
7140             LOP(OP_LISTEN,XTERM);
7141
7142         case KEY_lock:
7143             UNI(OP_LOCK);
7144
7145         case KEY_lstat:
7146             UNI(OP_LSTAT);
7147
7148         case KEY_m:
7149             s = scan_pat(s,OP_MATCH);
7150             TERM(sublex_start());
7151
7152         case KEY_map:
7153             LOP(OP_MAPSTART, XREF);
7154
7155         case KEY_mkdir:
7156             LOP(OP_MKDIR,XTERM);
7157
7158         case KEY_msgctl:
7159             LOP(OP_MSGCTL,XTERM);
7160
7161         case KEY_msgget:
7162             LOP(OP_MSGGET,XTERM);
7163
7164         case KEY_msgrcv:
7165             LOP(OP_MSGRCV,XTERM);
7166
7167         case KEY_msgsnd:
7168             LOP(OP_MSGSND,XTERM);
7169
7170         case KEY_our:
7171         case KEY_my:
7172         case KEY_state:
7173             PL_in_my = (U16)tmp;
7174             s = SKIPSPACE1(s);
7175             if (isIDFIRST_lazy_if(s,UTF)) {
7176 #ifdef PERL_MAD
7177                 char* start = s;
7178 #endif
7179                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7180                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7181                     goto really_sub;
7182                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7183                 if (!PL_in_my_stash) {
7184                     char tmpbuf[1024];
7185                     PL_bufptr = s;
7186                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7187                     yyerror(tmpbuf);
7188                 }
7189 #ifdef PERL_MAD
7190                 if (PL_madskills) {     /* just add type to declarator token */
7191                     sv_catsv(PL_thistoken, PL_nextwhite);
7192                     PL_nextwhite = 0;
7193                     sv_catpvn(PL_thistoken, start, s - start);
7194                 }
7195 #endif
7196             }
7197             pl_yylval.ival = 1;
7198             OPERATOR(MY);
7199
7200         case KEY_next:
7201             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7202             LOOPX(OP_NEXT);
7203
7204         case KEY_ne:
7205             Eop(OP_SNE);
7206
7207         case KEY_no:
7208             s = tokenize_use(0, s);
7209             OPERATOR(USE);
7210
7211         case KEY_not:
7212             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7213                 FUN1(OP_NOT);
7214             else
7215                 OPERATOR(NOTOP);
7216
7217         case KEY_open:
7218             s = SKIPSPACE1(s);
7219             if (isIDFIRST_lazy_if(s,UTF)) {
7220                 const char *t;
7221                 for (d = s; isALNUM_lazy_if(d,UTF);)
7222                     d++;
7223                 for (t=d; isSPACE(*t);)
7224                     t++;
7225                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7226                     /* [perl #16184] */
7227                     && !(t[0] == '=' && t[1] == '>')
7228                 ) {
7229                     int parms_len = (int)(d-s);
7230                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7231                            "Precedence problem: open %.*s should be open(%.*s)",
7232                             parms_len, s, parms_len, s);
7233                 }
7234             }
7235             LOP(OP_OPEN,XTERM);
7236
7237         case KEY_or:
7238             pl_yylval.ival = OP_OR;
7239             OPERATOR(OROP);
7240
7241         case KEY_ord:
7242             UNI(OP_ORD);
7243
7244         case KEY_oct:
7245             UNI(OP_OCT);
7246
7247         case KEY_opendir:
7248             LOP(OP_OPEN_DIR,XTERM);
7249
7250         case KEY_print:
7251             checkcomma(s,PL_tokenbuf,"filehandle");
7252             LOP(OP_PRINT,XREF);
7253
7254         case KEY_printf:
7255             checkcomma(s,PL_tokenbuf,"filehandle");
7256             LOP(OP_PRTF,XREF);
7257
7258         case KEY_prototype:
7259             UNI(OP_PROTOTYPE);
7260
7261         case KEY_push:
7262             LOP(OP_PUSH,XTERM);
7263
7264         case KEY_pop:
7265             UNIDOR(OP_POP);
7266
7267         case KEY_pos:
7268             UNIDOR(OP_POS);
7269
7270         case KEY_pack:
7271             LOP(OP_PACK,XTERM);
7272
7273         case KEY_package:
7274             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7275             s = SKIPSPACE1(s);
7276             s = force_strict_version(s);
7277             PL_lex_expect = XBLOCK;
7278             OPERATOR(PACKAGE);
7279
7280         case KEY_pipe:
7281             LOP(OP_PIPE_OP,XTERM);
7282
7283         case KEY_q:
7284             s = scan_str(s,!!PL_madskills,FALSE);
7285             if (!s)
7286                 missingterm(NULL);
7287             pl_yylval.ival = OP_CONST;
7288             TERM(sublex_start());
7289
7290         case KEY_quotemeta:
7291             UNI(OP_QUOTEMETA);
7292
7293         case KEY_qw:
7294             s = scan_str(s,!!PL_madskills,FALSE);
7295             if (!s)
7296                 missingterm(NULL);
7297             PL_expect = XOPERATOR;
7298             force_next(')');
7299             if (SvCUR(PL_lex_stuff)) {
7300                 OP *words = NULL;
7301                 int warned = 0;
7302                 d = SvPV_force(PL_lex_stuff, len);
7303                 while (len) {
7304                     for (; isSPACE(*d) && len; --len, ++d)
7305                         /**/;
7306                     if (len) {
7307                         SV *sv;
7308                         const char *b = d;
7309                         if (!warned && ckWARN(WARN_QW)) {
7310                             for (; !isSPACE(*d) && len; --len, ++d) {
7311                                 if (*d == ',') {
7312                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7313                                         "Possible attempt to separate words with commas");
7314                                     ++warned;
7315                                 }
7316                                 else if (*d == '#') {
7317                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7318                                         "Possible attempt to put comments in qw() list");
7319                                     ++warned;
7320                                 }
7321                             }
7322                         }
7323                         else {
7324                             for (; !isSPACE(*d) && len; --len, ++d)
7325                                 /**/;
7326                         }
7327                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7328                         words = append_elem(OP_LIST, words,
7329                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7330                     }
7331                 }
7332                 if (words) {
7333                     start_force(PL_curforce);
7334                     NEXTVAL_NEXTTOKE.opval = words;
7335                     force_next(THING);
7336                 }
7337             }
7338             if (PL_lex_stuff) {
7339                 SvREFCNT_dec(PL_lex_stuff);
7340                 PL_lex_stuff = NULL;
7341             }
7342             PL_expect = XTERM;
7343             TOKEN('(');
7344
7345         case KEY_qq:
7346             s = scan_str(s,!!PL_madskills,FALSE);
7347             if (!s)
7348                 missingterm(NULL);
7349             pl_yylval.ival = OP_STRINGIFY;
7350             if (SvIVX(PL_lex_stuff) == '\'')
7351                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7352             TERM(sublex_start());
7353
7354         case KEY_qr:
7355             s = scan_pat(s,OP_QR);
7356             TERM(sublex_start());
7357
7358         case KEY_qx:
7359             s = scan_str(s,!!PL_madskills,FALSE);
7360             if (!s)
7361                 missingterm(NULL);
7362             readpipe_override();
7363             TERM(sublex_start());
7364
7365         case KEY_return:
7366             OLDLOP(OP_RETURN);
7367
7368         case KEY_require:
7369             s = SKIPSPACE1(s);
7370             if (isDIGIT(*s)) {
7371                 s = force_version(s, FALSE);
7372             }
7373             else if (*s != 'v' || !isDIGIT(s[1])
7374                     || (s = force_version(s, TRUE), *s == 'v'))
7375             {
7376                 *PL_tokenbuf = '\0';
7377                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7378                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7379                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7380                 else if (*s == '<')
7381                     yyerror("<> should be quotes");
7382             }
7383             if (orig_keyword == KEY_require) {
7384                 orig_keyword = 0;
7385                 pl_yylval.ival = 1;
7386             }
7387             else
7388                 pl_yylval.ival = 0;
7389             PL_expect = XTERM;
7390             PL_bufptr = s;
7391             PL_last_uni = PL_oldbufptr;
7392             PL_last_lop_op = OP_REQUIRE;
7393             s = skipspace(s);
7394             return REPORT( (int)REQUIRE );
7395
7396         case KEY_reset:
7397             UNI(OP_RESET);
7398
7399         case KEY_redo:
7400             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7401             LOOPX(OP_REDO);
7402
7403         case KEY_rename:
7404             LOP(OP_RENAME,XTERM);
7405
7406         case KEY_rand:
7407             UNI(OP_RAND);
7408
7409         case KEY_rmdir:
7410             UNI(OP_RMDIR);
7411
7412         case KEY_rindex:
7413             LOP(OP_RINDEX,XTERM);
7414
7415         case KEY_read:
7416             LOP(OP_READ,XTERM);
7417
7418         case KEY_readdir:
7419             UNI(OP_READDIR);
7420
7421         case KEY_readline:
7422             UNIDOR(OP_READLINE);
7423
7424         case KEY_readpipe:
7425             UNIDOR(OP_BACKTICK);
7426
7427         case KEY_rewinddir:
7428             UNI(OP_REWINDDIR);
7429
7430         case KEY_recv:
7431             LOP(OP_RECV,XTERM);
7432
7433         case KEY_reverse:
7434             LOP(OP_REVERSE,XTERM);
7435
7436         case KEY_readlink:
7437             UNIDOR(OP_READLINK);
7438
7439         case KEY_ref:
7440             UNI(OP_REF);
7441
7442         case KEY_s:
7443             s = scan_subst(s);
7444             if (pl_yylval.opval)
7445                 TERM(sublex_start());
7446             else
7447                 TOKEN(1);       /* force error */
7448
7449         case KEY_say:
7450             checkcomma(s,PL_tokenbuf,"filehandle");
7451             LOP(OP_SAY,XREF);
7452
7453         case KEY_chomp:
7454             UNI(OP_CHOMP);
7455
7456         case KEY_scalar:
7457             UNI(OP_SCALAR);
7458
7459         case KEY_select:
7460             LOP(OP_SELECT,XTERM);
7461
7462         case KEY_seek:
7463             LOP(OP_SEEK,XTERM);
7464
7465         case KEY_semctl:
7466             LOP(OP_SEMCTL,XTERM);
7467
7468         case KEY_semget:
7469             LOP(OP_SEMGET,XTERM);
7470
7471         case KEY_semop:
7472             LOP(OP_SEMOP,XTERM);
7473
7474         case KEY_send:
7475             LOP(OP_SEND,XTERM);
7476
7477         case KEY_setpgrp:
7478             LOP(OP_SETPGRP,XTERM);
7479
7480         case KEY_setpriority:
7481             LOP(OP_SETPRIORITY,XTERM);
7482
7483         case KEY_sethostent:
7484             UNI(OP_SHOSTENT);
7485
7486         case KEY_setnetent:
7487             UNI(OP_SNETENT);
7488
7489         case KEY_setservent:
7490             UNI(OP_SSERVENT);
7491
7492         case KEY_setprotoent:
7493             UNI(OP_SPROTOENT);
7494
7495         case KEY_setpwent:
7496             FUN0(OP_SPWENT);
7497
7498         case KEY_setgrent:
7499             FUN0(OP_SGRENT);
7500
7501         case KEY_seekdir:
7502             LOP(OP_SEEKDIR,XTERM);
7503
7504         case KEY_setsockopt:
7505             LOP(OP_SSOCKOPT,XTERM);
7506
7507         case KEY_shift:
7508             UNIDOR(OP_SHIFT);
7509
7510         case KEY_shmctl:
7511             LOP(OP_SHMCTL,XTERM);
7512
7513         case KEY_shmget:
7514             LOP(OP_SHMGET,XTERM);
7515
7516         case KEY_shmread:
7517             LOP(OP_SHMREAD,XTERM);
7518
7519         case KEY_shmwrite:
7520             LOP(OP_SHMWRITE,XTERM);
7521
7522         case KEY_shutdown:
7523             LOP(OP_SHUTDOWN,XTERM);
7524
7525         case KEY_sin:
7526             UNI(OP_SIN);
7527
7528         case KEY_sleep:
7529             UNI(OP_SLEEP);
7530
7531         case KEY_socket:
7532             LOP(OP_SOCKET,XTERM);
7533
7534         case KEY_socketpair:
7535             LOP(OP_SOCKPAIR,XTERM);
7536
7537         case KEY_sort:
7538             checkcomma(s,PL_tokenbuf,"subroutine name");
7539             s = SKIPSPACE1(s);
7540             if (*s == ';' || *s == ')')         /* probably a close */
7541                 Perl_croak(aTHX_ "sort is now a reserved word");
7542             PL_expect = XTERM;
7543             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7544             LOP(OP_SORT,XREF);
7545
7546         case KEY_split:
7547             LOP(OP_SPLIT,XTERM);
7548
7549         case KEY_sprintf:
7550             LOP(OP_SPRINTF,XTERM);
7551
7552         case KEY_splice:
7553             LOP(OP_SPLICE,XTERM);
7554
7555         case KEY_sqrt:
7556             UNI(OP_SQRT);
7557
7558         case KEY_srand:
7559             UNI(OP_SRAND);
7560
7561         case KEY_stat:
7562             UNI(OP_STAT);
7563
7564         case KEY_study:
7565             UNI(OP_STUDY);
7566
7567         case KEY_substr:
7568             LOP(OP_SUBSTR,XTERM);
7569
7570         case KEY_format:
7571         case KEY_sub:
7572           really_sub:
7573             {
7574                 char tmpbuf[sizeof PL_tokenbuf];
7575                 SSize_t tboffset = 0;
7576                 expectation attrful;
7577                 bool have_name, have_proto;
7578                 const int key = tmp;
7579
7580 #ifdef PERL_MAD
7581                 SV *tmpwhite = 0;
7582
7583                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7584                 SV *subtoken = newSVpvn(tstart, s - tstart);
7585                 PL_thistoken = 0;
7586
7587                 d = s;
7588                 s = SKIPSPACE2(s,tmpwhite);
7589 #else
7590                 s = skipspace(s);
7591 #endif
7592
7593                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7594                     (*s == ':' && s[1] == ':'))
7595                 {
7596 #ifdef PERL_MAD
7597                     SV *nametoke = NULL;
7598 #endif
7599
7600                     PL_expect = XBLOCK;
7601                     attrful = XATTRBLOCK;
7602                     /* remember buffer pos'n for later force_word */
7603                     tboffset = s - PL_oldbufptr;
7604                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7605 #ifdef PERL_MAD
7606                     if (PL_madskills)
7607                         nametoke = newSVpvn(s, d - s);
7608 #endif
7609                     if (memchr(tmpbuf, ':', len))
7610                         sv_setpvn(PL_subname, tmpbuf, len);
7611                     else {
7612                         sv_setsv(PL_subname,PL_curstname);
7613                         sv_catpvs(PL_subname,"::");
7614                         sv_catpvn(PL_subname,tmpbuf,len);
7615                     }
7616                     have_name = TRUE;
7617
7618 #ifdef PERL_MAD
7619
7620                     start_force(0);
7621                     CURMAD('X', nametoke);
7622                     CURMAD('_', tmpwhite);
7623                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7624                                       FALSE, TRUE, TRUE);
7625
7626                     s = SKIPSPACE2(d,tmpwhite);
7627 #else
7628                     s = skipspace(d);
7629 #endif
7630                 }
7631                 else {
7632                     if (key == KEY_my)
7633                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7634                     PL_expect = XTERMBLOCK;
7635                     attrful = XATTRTERM;
7636                     sv_setpvs(PL_subname,"?");
7637                     have_name = FALSE;
7638                 }
7639
7640                 if (key == KEY_format) {
7641                     if (*s == '=')
7642                         PL_lex_formbrack = PL_lex_brackets + 1;
7643 #ifdef PERL_MAD
7644                     PL_thistoken = subtoken;
7645                     s = d;
7646 #else
7647                     if (have_name)
7648                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7649                                           FALSE, TRUE, TRUE);
7650 #endif
7651                     OPERATOR(FORMAT);
7652                 }
7653
7654                 /* Look for a prototype */
7655                 if (*s == '(') {
7656                     char *p;
7657                     bool bad_proto = FALSE;
7658                     bool in_brackets = FALSE;
7659                     char greedy_proto = ' ';
7660                     bool proto_after_greedy_proto = FALSE;
7661                     bool must_be_last = FALSE;
7662                     bool underscore = FALSE;
7663                     bool seen_underscore = FALSE;
7664                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7665
7666                     s = scan_str(s,!!PL_madskills,FALSE);
7667                     if (!s)
7668                         Perl_croak(aTHX_ "Prototype not terminated");
7669                     /* strip spaces and check for bad characters */
7670                     d = SvPVX(PL_lex_stuff);
7671                     tmp = 0;
7672                     for (p = d; *p; ++p) {
7673                         if (!isSPACE(*p)) {
7674                             d[tmp++] = *p;
7675
7676                             if (warnillegalproto) {
7677                                 if (must_be_last)
7678                                     proto_after_greedy_proto = TRUE;
7679                                 if (!strchr("$@%*;[]&\\_", *p)) {
7680                                     bad_proto = TRUE;
7681                                 }
7682                                 else {
7683                                     if ( underscore ) {
7684                                         if ( *p != ';' )
7685                                             bad_proto = TRUE;
7686                                         underscore = FALSE;
7687                                     }
7688                                     if ( *p == '[' ) {
7689                                         in_brackets = TRUE;
7690                                     }
7691                                     else if ( *p == ']' ) {
7692                                         in_brackets = FALSE;
7693                                     }
7694                                     else if ( (*p == '@' || *p == '%') &&
7695                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7696                                          !in_brackets ) {
7697                                         must_be_last = TRUE;
7698                                         greedy_proto = *p;
7699                                     }
7700                                     else if ( *p == '_' ) {
7701                                         underscore = seen_underscore = TRUE;
7702                                     }
7703                                 }
7704                             }
7705                         }
7706                     }
7707                     d[tmp] = '\0';
7708                     if (proto_after_greedy_proto)
7709                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7710                                     "Prototype after '%c' for %"SVf" : %s",
7711                                     greedy_proto, SVfARG(PL_subname), d);
7712                     if (bad_proto)
7713                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7714                                     "Illegal character %sin prototype for %"SVf" : %s",
7715                                     seen_underscore ? "after '_' " : "",
7716                                     SVfARG(PL_subname), d);
7717                     SvCUR_set(PL_lex_stuff, tmp);
7718                     have_proto = TRUE;
7719
7720 #ifdef PERL_MAD
7721                     start_force(0);
7722                     CURMAD('q', PL_thisopen);
7723                     CURMAD('_', tmpwhite);
7724                     CURMAD('=', PL_thisstuff);
7725                     CURMAD('Q', PL_thisclose);
7726                     NEXTVAL_NEXTTOKE.opval =
7727                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7728                     PL_lex_stuff = NULL;
7729                     force_next(THING);
7730
7731                     s = SKIPSPACE2(s,tmpwhite);
7732 #else
7733                     s = skipspace(s);
7734 #endif
7735                 }
7736                 else
7737                     have_proto = FALSE;
7738
7739                 if (*s == ':' && s[1] != ':')
7740                     PL_expect = attrful;
7741                 else if (*s != '{' && key == KEY_sub) {
7742                     if (!have_name)
7743                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7744                     else if (*s != ';' && *s != '}')
7745                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7746                 }
7747
7748 #ifdef PERL_MAD
7749                 start_force(0);
7750                 if (tmpwhite) {
7751                     if (PL_madskills)
7752                         curmad('^', newSVpvs(""));
7753                     CURMAD('_', tmpwhite);
7754                 }
7755                 force_next(0);
7756
7757                 PL_thistoken = subtoken;
7758 #else
7759                 if (have_proto) {
7760                     NEXTVAL_NEXTTOKE.opval =
7761                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7762                     PL_lex_stuff = NULL;
7763                     force_next(THING);
7764                 }
7765 #endif
7766                 if (!have_name) {
7767                     if (PL_curstash)
7768                         sv_setpvs(PL_subname, "__ANON__");
7769                     else
7770                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7771                     TOKEN(ANONSUB);
7772                 }
7773 #ifndef PERL_MAD
7774                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7775                                   FALSE, TRUE, TRUE);
7776 #endif
7777                 if (key == KEY_my)
7778                     TOKEN(MYSUB);
7779                 TOKEN(SUB);
7780             }
7781
7782         case KEY_system:
7783             LOP(OP_SYSTEM,XREF);
7784
7785         case KEY_symlink:
7786             LOP(OP_SYMLINK,XTERM);
7787
7788         case KEY_syscall:
7789             LOP(OP_SYSCALL,XTERM);
7790
7791         case KEY_sysopen:
7792             LOP(OP_SYSOPEN,XTERM);
7793
7794         case KEY_sysseek:
7795             LOP(OP_SYSSEEK,XTERM);
7796
7797         case KEY_sysread:
7798             LOP(OP_SYSREAD,XTERM);
7799
7800         case KEY_syswrite:
7801             LOP(OP_SYSWRITE,XTERM);
7802
7803         case KEY_tr:
7804             s = scan_trans(s);
7805             TERM(sublex_start());
7806
7807         case KEY_tell:
7808             UNI(OP_TELL);
7809
7810         case KEY_telldir:
7811             UNI(OP_TELLDIR);
7812
7813         case KEY_tie:
7814             LOP(OP_TIE,XTERM);
7815
7816         case KEY_tied:
7817             UNI(OP_TIED);
7818
7819         case KEY_time:
7820             FUN0(OP_TIME);
7821
7822         case KEY_times:
7823             FUN0(OP_TMS);
7824
7825         case KEY_truncate:
7826             LOP(OP_TRUNCATE,XTERM);
7827
7828         case KEY_uc:
7829             UNI(OP_UC);
7830
7831         case KEY_ucfirst:
7832             UNI(OP_UCFIRST);
7833
7834         case KEY_untie:
7835             UNI(OP_UNTIE);
7836
7837         case KEY_until:
7838             pl_yylval.ival = CopLINE(PL_curcop);
7839             OPERATOR(UNTIL);
7840
7841         case KEY_unless:
7842             pl_yylval.ival = CopLINE(PL_curcop);
7843             OPERATOR(UNLESS);
7844
7845         case KEY_unlink:
7846             LOP(OP_UNLINK,XTERM);
7847
7848         case KEY_undef:
7849             UNIDOR(OP_UNDEF);
7850
7851         case KEY_unpack:
7852             LOP(OP_UNPACK,XTERM);
7853
7854         case KEY_utime:
7855             LOP(OP_UTIME,XTERM);
7856
7857         case KEY_umask:
7858             UNIDOR(OP_UMASK);
7859
7860         case KEY_unshift:
7861             LOP(OP_UNSHIFT,XTERM);
7862
7863         case KEY_use:
7864             s = tokenize_use(1, s);
7865             OPERATOR(USE);
7866
7867         case KEY_values:
7868             UNI(OP_VALUES);
7869
7870         case KEY_vec:
7871             LOP(OP_VEC,XTERM);
7872
7873         case KEY_when:
7874             pl_yylval.ival = CopLINE(PL_curcop);
7875             OPERATOR(WHEN);
7876
7877         case KEY_while:
7878             pl_yylval.ival = CopLINE(PL_curcop);
7879             OPERATOR(WHILE);
7880
7881         case KEY_warn:
7882             PL_hints |= HINT_BLOCK_SCOPE;
7883             LOP(OP_WARN,XTERM);
7884
7885         case KEY_wait:
7886             FUN0(OP_WAIT);
7887
7888         case KEY_waitpid:
7889             LOP(OP_WAITPID,XTERM);
7890
7891         case KEY_wantarray:
7892             FUN0(OP_WANTARRAY);
7893
7894         case KEY_write:
7895 #ifdef EBCDIC
7896         {
7897             char ctl_l[2];
7898             ctl_l[0] = toCTRL('L');
7899             ctl_l[1] = '\0';
7900             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7901         }
7902 #else
7903             /* Make sure $^L is defined */
7904             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7905 #endif
7906             UNI(OP_ENTERWRITE);
7907
7908         case KEY_x:
7909             if (PL_expect == XOPERATOR)
7910                 Mop(OP_REPEAT);
7911             check_uni();
7912             goto just_a_word;
7913
7914         case KEY_xor:
7915             pl_yylval.ival = OP_XOR;
7916             OPERATOR(OROP);
7917
7918         case KEY_y:
7919             s = scan_trans(s);
7920             TERM(sublex_start());
7921         }
7922     }}
7923 }
7924 #ifdef __SC__
7925 #pragma segment Main
7926 #endif
7927
7928 static int
7929 S_pending_ident(pTHX)
7930 {
7931     dVAR;
7932     register char *d;
7933     PADOFFSET tmp = 0;
7934     /* pit holds the identifier we read and pending_ident is reset */
7935     char pit = PL_pending_ident;
7936     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7937     /* All routes through this function want to know if there is a colon.  */
7938     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7939     PL_pending_ident = 0;
7940
7941     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7942     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7943           "### Pending identifier '%s'\n", PL_tokenbuf); });
7944
7945     /* if we're in a my(), we can't allow dynamics here.
7946        $foo'bar has already been turned into $foo::bar, so
7947        just check for colons.
7948
7949        if it's a legal name, the OP is a PADANY.
7950     */
7951     if (PL_in_my) {
7952         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7953             if (has_colon)
7954                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7955                                   "variable %s in \"our\"",
7956                                   PL_tokenbuf));
7957             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7958         }
7959         else {
7960             if (has_colon)
7961                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7962                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7963
7964             pl_yylval.opval = newOP(OP_PADANY, 0);
7965             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7966             return PRIVATEREF;
7967         }
7968     }
7969
7970     /*
7971        build the ops for accesses to a my() variable.
7972
7973        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7974        then used in a comparison.  This catches most, but not
7975        all cases.  For instance, it catches
7976            sort { my($a); $a <=> $b }
7977        but not
7978            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7979        (although why you'd do that is anyone's guess).
7980     */
7981
7982     if (!has_colon) {
7983         if (!PL_in_my)
7984             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7985         if (tmp != NOT_IN_PAD) {
7986             /* might be an "our" variable" */
7987             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7988                 /* build ops for a bareword */
7989                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7990                 HEK * const stashname = HvNAME_HEK(stash);
7991                 SV *  const sym = newSVhek(stashname);
7992                 sv_catpvs(sym, "::");
7993                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7994                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7995                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7996                 gv_fetchsv(sym,
7997                     (PL_in_eval
7998                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7999                         : GV_ADDMULTI
8000                     ),
8001                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8002                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8003                      : SVt_PVHV));
8004                 return WORD;
8005             }
8006
8007             /* if it's a sort block and they're naming $a or $b */
8008             if (PL_last_lop_op == OP_SORT &&
8009                 PL_tokenbuf[0] == '$' &&
8010                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8011                 && !PL_tokenbuf[2])
8012             {
8013                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8014                      d < PL_bufend && *d != '\n';
8015                      d++)
8016                 {
8017                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8018                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8019                               PL_tokenbuf);
8020                     }
8021                 }
8022             }
8023
8024             pl_yylval.opval = newOP(OP_PADANY, 0);
8025             pl_yylval.opval->op_targ = tmp;
8026             return PRIVATEREF;
8027         }
8028     }
8029
8030     /*
8031        Whine if they've said @foo in a doublequoted string,
8032        and @foo isn't a variable we can find in the symbol
8033        table.
8034     */
8035     if (ckWARN(WARN_AMBIGUOUS) &&
8036         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8037         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8038                                          SVt_PVAV);
8039         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8040                 /* DO NOT warn for @- and @+ */
8041                 && !( PL_tokenbuf[2] == '\0' &&
8042                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8043            )
8044         {
8045             /* Downgraded from fatal to warning 20000522 mjd */
8046             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8047                         "Possible unintended interpolation of %s in string",
8048                         PL_tokenbuf);
8049         }
8050     }
8051
8052     /* build ops for a bareword */
8053     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8054                                                       tokenbuf_len - 1));
8055     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8056     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8057                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8058                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8059                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8060                       : SVt_PVHV));
8061     return WORD;
8062 }
8063
8064 /*
8065  *  The following code was generated by perl_keyword.pl.
8066  */
8067
8068 I32
8069 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8070 {
8071     dVAR;
8072
8073     PERL_ARGS_ASSERT_KEYWORD;
8074
8075   switch (len)
8076   {
8077     case 1: /* 5 tokens of length 1 */
8078       switch (name[0])
8079       {
8080         case 'm':
8081           {                                       /* m          */
8082             return KEY_m;
8083           }
8084
8085         case 'q':
8086           {                                       /* q          */
8087             return KEY_q;
8088           }
8089
8090         case 's':
8091           {                                       /* s          */
8092             return KEY_s;
8093           }
8094
8095         case 'x':
8096           {                                       /* x          */
8097             return -KEY_x;
8098           }
8099
8100         case 'y':
8101           {                                       /* y          */
8102             return KEY_y;
8103           }
8104
8105         default:
8106           goto unknown;
8107       }
8108
8109     case 2: /* 18 tokens of length 2 */
8110       switch (name[0])
8111       {
8112         case 'd':
8113           if (name[1] == 'o')
8114           {                                       /* do         */
8115             return KEY_do;
8116           }
8117
8118           goto unknown;
8119
8120         case 'e':
8121           if (name[1] == 'q')
8122           {                                       /* eq         */
8123             return -KEY_eq;
8124           }
8125
8126           goto unknown;
8127
8128         case 'g':
8129           switch (name[1])
8130           {
8131             case 'e':
8132               {                                   /* ge         */
8133                 return -KEY_ge;
8134               }
8135
8136             case 't':
8137               {                                   /* gt         */
8138                 return -KEY_gt;
8139               }
8140
8141             default:
8142               goto unknown;
8143           }
8144
8145         case 'i':
8146           if (name[1] == 'f')
8147           {                                       /* if         */
8148             return KEY_if;
8149           }
8150
8151           goto unknown;
8152
8153         case 'l':
8154           switch (name[1])
8155           {
8156             case 'c':
8157               {                                   /* lc         */
8158                 return -KEY_lc;
8159               }
8160
8161             case 'e':
8162               {                                   /* le         */
8163                 return -KEY_le;
8164               }
8165
8166             case 't':
8167               {                                   /* lt         */
8168                 return -KEY_lt;
8169               }
8170
8171             default:
8172               goto unknown;
8173           }
8174
8175         case 'm':
8176           if (name[1] == 'y')
8177           {                                       /* my         */
8178             return KEY_my;
8179           }
8180
8181           goto unknown;
8182
8183         case 'n':
8184           switch (name[1])
8185           {
8186             case 'e':
8187               {                                   /* ne         */
8188                 return -KEY_ne;
8189               }
8190
8191             case 'o':
8192               {                                   /* no         */
8193                 return KEY_no;
8194               }
8195
8196             default:
8197               goto unknown;
8198           }
8199
8200         case 'o':
8201           if (name[1] == 'r')
8202           {                                       /* or         */
8203             return -KEY_or;
8204           }
8205
8206           goto unknown;
8207
8208         case 'q':
8209           switch (name[1])
8210           {
8211             case 'q':
8212               {                                   /* qq         */
8213                 return KEY_qq;
8214               }
8215
8216             case 'r':
8217               {                                   /* qr         */
8218                 return KEY_qr;
8219               }
8220
8221             case 'w':
8222               {                                   /* qw         */
8223                 return KEY_qw;
8224               }
8225
8226             case 'x':
8227               {                                   /* qx         */
8228                 return KEY_qx;
8229               }
8230
8231             default:
8232               goto unknown;
8233           }
8234
8235         case 't':
8236           if (name[1] == 'r')
8237           {                                       /* tr         */
8238             return KEY_tr;
8239           }
8240
8241           goto unknown;
8242
8243         case 'u':
8244           if (name[1] == 'c')
8245           {                                       /* uc         */
8246             return -KEY_uc;
8247           }
8248
8249           goto unknown;
8250
8251         default:
8252           goto unknown;
8253       }
8254
8255     case 3: /* 29 tokens of length 3 */
8256       switch (name[0])
8257       {
8258         case 'E':
8259           if (name[1] == 'N' &&
8260               name[2] == 'D')
8261           {                                       /* END        */
8262             return KEY_END;
8263           }
8264
8265           goto unknown;
8266
8267         case 'a':
8268           switch (name[1])
8269           {
8270             case 'b':
8271               if (name[2] == 's')
8272               {                                   /* abs        */
8273                 return -KEY_abs;
8274               }
8275
8276               goto unknown;
8277
8278             case 'n':
8279               if (name[2] == 'd')
8280               {                                   /* and        */
8281                 return -KEY_and;
8282               }
8283
8284               goto unknown;
8285
8286             default:
8287               goto unknown;
8288           }
8289
8290         case 'c':
8291           switch (name[1])
8292           {
8293             case 'h':
8294               if (name[2] == 'r')
8295               {                                   /* chr        */
8296                 return -KEY_chr;
8297               }
8298
8299               goto unknown;
8300
8301             case 'm':
8302               if (name[2] == 'p')
8303               {                                   /* cmp        */
8304                 return -KEY_cmp;
8305               }
8306
8307               goto unknown;
8308
8309             case 'o':
8310               if (name[2] == 's')
8311               {                                   /* cos        */
8312                 return -KEY_cos;
8313               }
8314
8315               goto unknown;
8316
8317             default:
8318               goto unknown;
8319           }
8320
8321         case 'd':
8322           if (name[1] == 'i' &&
8323               name[2] == 'e')
8324           {                                       /* die        */
8325             return -KEY_die;
8326           }
8327
8328           goto unknown;
8329
8330         case 'e':
8331           switch (name[1])
8332           {
8333             case 'o':
8334               if (name[2] == 'f')
8335               {                                   /* eof        */
8336                 return -KEY_eof;
8337               }
8338
8339               goto unknown;
8340
8341             case 'x':
8342               if (name[2] == 'p')
8343               {                                   /* exp        */
8344                 return -KEY_exp;
8345               }
8346
8347               goto unknown;
8348
8349             default:
8350               goto unknown;
8351           }
8352
8353         case 'f':
8354           if (name[1] == 'o' &&
8355               name[2] == 'r')
8356           {                                       /* for        */
8357             return KEY_for;
8358           }
8359
8360           goto unknown;
8361
8362         case 'h':
8363           if (name[1] == 'e' &&
8364               name[2] == 'x')
8365           {                                       /* hex        */
8366             return -KEY_hex;
8367           }
8368
8369           goto unknown;
8370
8371         case 'i':
8372           if (name[1] == 'n' &&
8373               name[2] == 't')
8374           {                                       /* int        */
8375             return -KEY_int;
8376           }
8377
8378           goto unknown;
8379
8380         case 'l':
8381           if (name[1] == 'o' &&
8382               name[2] == 'g')
8383           {                                       /* log        */
8384             return -KEY_log;
8385           }
8386
8387           goto unknown;
8388
8389         case 'm':
8390           if (name[1] == 'a' &&
8391               name[2] == 'p')
8392           {                                       /* map        */
8393             return KEY_map;
8394           }
8395
8396           goto unknown;
8397
8398         case 'n':
8399           if (name[1] == 'o' &&
8400               name[2] == 't')
8401           {                                       /* not        */
8402             return -KEY_not;
8403           }
8404
8405           goto unknown;
8406
8407         case 'o':
8408           switch (name[1])
8409           {
8410             case 'c':
8411               if (name[2] == 't')
8412               {                                   /* oct        */
8413                 return -KEY_oct;
8414               }
8415
8416               goto unknown;
8417
8418             case 'r':
8419               if (name[2] == 'd')
8420               {                                   /* ord        */
8421                 return -KEY_ord;
8422               }
8423
8424               goto unknown;
8425
8426             case 'u':
8427               if (name[2] == 'r')
8428               {                                   /* our        */
8429                 return KEY_our;
8430               }
8431
8432               goto unknown;
8433
8434             default:
8435               goto unknown;
8436           }
8437
8438         case 'p':
8439           if (name[1] == 'o')
8440           {
8441             switch (name[2])
8442             {
8443               case 'p':
8444                 {                                 /* pop        */
8445                   return -KEY_pop;
8446                 }
8447
8448               case 's':
8449                 {                                 /* pos        */
8450                   return KEY_pos;
8451                 }
8452
8453               default:
8454                 goto unknown;
8455             }
8456           }
8457
8458           goto unknown;
8459
8460         case 'r':
8461           if (name[1] == 'e' &&
8462               name[2] == 'f')
8463           {                                       /* ref        */
8464             return -KEY_ref;
8465           }
8466
8467           goto unknown;
8468
8469         case 's':
8470           switch (name[1])
8471           {
8472             case 'a':
8473               if (name[2] == 'y')
8474               {                                   /* say        */
8475                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8476               }
8477
8478               goto unknown;
8479
8480             case 'i':
8481               if (name[2] == 'n')
8482               {                                   /* sin        */
8483                 return -KEY_sin;
8484               }
8485
8486               goto unknown;
8487
8488             case 'u':
8489               if (name[2] == 'b')
8490               {                                   /* sub        */
8491                 return KEY_sub;
8492               }
8493
8494               goto unknown;
8495
8496             default:
8497               goto unknown;
8498           }
8499
8500         case 't':
8501           if (name[1] == 'i' &&
8502               name[2] == 'e')
8503           {                                       /* tie        */
8504             return KEY_tie;
8505           }
8506
8507           goto unknown;
8508
8509         case 'u':
8510           if (name[1] == 's' &&
8511               name[2] == 'e')
8512           {                                       /* use        */
8513             return KEY_use;
8514           }
8515
8516           goto unknown;
8517
8518         case 'v':
8519           if (name[1] == 'e' &&
8520               name[2] == 'c')
8521           {                                       /* vec        */
8522             return -KEY_vec;
8523           }
8524
8525           goto unknown;
8526
8527         case 'x':
8528           if (name[1] == 'o' &&
8529               name[2] == 'r')
8530           {                                       /* xor        */
8531             return -KEY_xor;
8532           }
8533
8534           goto unknown;
8535
8536         default:
8537           goto unknown;
8538       }
8539
8540     case 4: /* 41 tokens of length 4 */
8541       switch (name[0])
8542       {
8543         case 'C':
8544           if (name[1] == 'O' &&
8545               name[2] == 'R' &&
8546               name[3] == 'E')
8547           {                                       /* CORE       */
8548             return -KEY_CORE;
8549           }
8550
8551           goto unknown;
8552
8553         case 'I':
8554           if (name[1] == 'N' &&
8555               name[2] == 'I' &&
8556               name[3] == 'T')
8557           {                                       /* INIT       */
8558             return KEY_INIT;
8559           }
8560
8561           goto unknown;
8562
8563         case 'b':
8564           if (name[1] == 'i' &&
8565               name[2] == 'n' &&
8566               name[3] == 'd')
8567           {                                       /* bind       */
8568             return -KEY_bind;
8569           }
8570
8571           goto unknown;
8572
8573         case 'c':
8574           if (name[1] == 'h' &&
8575               name[2] == 'o' &&
8576               name[3] == 'p')
8577           {                                       /* chop       */
8578             return -KEY_chop;
8579           }
8580
8581           goto unknown;
8582
8583         case 'd':
8584           if (name[1] == 'u' &&
8585               name[2] == 'm' &&
8586               name[3] == 'p')
8587           {                                       /* dump       */
8588             return -KEY_dump;
8589           }
8590
8591           goto unknown;
8592
8593         case 'e':
8594           switch (name[1])
8595           {
8596             case 'a':
8597               if (name[2] == 'c' &&
8598                   name[3] == 'h')
8599               {                                   /* each       */
8600                 return -KEY_each;
8601               }
8602
8603               goto unknown;
8604
8605             case 'l':
8606               if (name[2] == 's' &&
8607                   name[3] == 'e')
8608               {                                   /* else       */
8609                 return KEY_else;
8610               }
8611
8612               goto unknown;
8613
8614             case 'v':
8615               if (name[2] == 'a' &&
8616                   name[3] == 'l')
8617               {                                   /* eval       */
8618                 return KEY_eval;
8619               }
8620
8621               goto unknown;
8622
8623             case 'x':
8624               switch (name[2])
8625               {
8626                 case 'e':
8627                   if (name[3] == 'c')
8628                   {                               /* exec       */
8629                     return -KEY_exec;
8630                   }
8631
8632                   goto unknown;
8633
8634                 case 'i':
8635                   if (name[3] == 't')
8636                   {                               /* exit       */
8637                     return -KEY_exit;
8638                   }
8639
8640                   goto unknown;
8641
8642                 default:
8643                   goto unknown;
8644               }
8645
8646             default:
8647               goto unknown;
8648           }
8649
8650         case 'f':
8651           if (name[1] == 'o' &&
8652               name[2] == 'r' &&
8653               name[3] == 'k')
8654           {                                       /* fork       */
8655             return -KEY_fork;
8656           }
8657
8658           goto unknown;
8659
8660         case 'g':
8661           switch (name[1])
8662           {
8663             case 'e':
8664               if (name[2] == 't' &&
8665                   name[3] == 'c')
8666               {                                   /* getc       */
8667                 return -KEY_getc;
8668               }
8669
8670               goto unknown;
8671
8672             case 'l':
8673               if (name[2] == 'o' &&
8674                   name[3] == 'b')
8675               {                                   /* glob       */
8676                 return KEY_glob;
8677               }
8678
8679               goto unknown;
8680
8681             case 'o':
8682               if (name[2] == 't' &&
8683                   name[3] == 'o')
8684               {                                   /* goto       */
8685                 return KEY_goto;
8686               }
8687
8688               goto unknown;
8689
8690             case 'r':
8691               if (name[2] == 'e' &&
8692                   name[3] == 'p')
8693               {                                   /* grep       */
8694                 return KEY_grep;
8695               }
8696
8697               goto unknown;
8698
8699             default:
8700               goto unknown;
8701           }
8702
8703         case 'j':
8704           if (name[1] == 'o' &&
8705               name[2] == 'i' &&
8706               name[3] == 'n')
8707           {                                       /* join       */
8708             return -KEY_join;
8709           }
8710
8711           goto unknown;
8712
8713         case 'k':
8714           switch (name[1])
8715           {
8716             case 'e':
8717               if (name[2] == 'y' &&
8718                   name[3] == 's')
8719               {                                   /* keys       */
8720                 return -KEY_keys;
8721               }
8722
8723               goto unknown;
8724
8725             case 'i':
8726               if (name[2] == 'l' &&
8727                   name[3] == 'l')
8728               {                                   /* kill       */
8729                 return -KEY_kill;
8730               }
8731
8732               goto unknown;
8733
8734             default:
8735               goto unknown;
8736           }
8737
8738         case 'l':
8739           switch (name[1])
8740           {
8741             case 'a':
8742               if (name[2] == 's' &&
8743                   name[3] == 't')
8744               {                                   /* last       */
8745                 return KEY_last;
8746               }
8747
8748               goto unknown;
8749
8750             case 'i':
8751               if (name[2] == 'n' &&
8752                   name[3] == 'k')
8753               {                                   /* link       */
8754                 return -KEY_link;
8755               }
8756
8757               goto unknown;
8758
8759             case 'o':
8760               if (name[2] == 'c' &&
8761                   name[3] == 'k')
8762               {                                   /* lock       */
8763                 return -KEY_lock;
8764               }
8765
8766               goto unknown;
8767
8768             default:
8769               goto unknown;
8770           }
8771
8772         case 'n':
8773           if (name[1] == 'e' &&
8774               name[2] == 'x' &&
8775               name[3] == 't')
8776           {                                       /* next       */
8777             return KEY_next;
8778           }
8779
8780           goto unknown;
8781
8782         case 'o':
8783           if (name[1] == 'p' &&
8784               name[2] == 'e' &&
8785               name[3] == 'n')
8786           {                                       /* open       */
8787             return -KEY_open;
8788           }
8789
8790           goto unknown;
8791
8792         case 'p':
8793           switch (name[1])
8794           {
8795             case 'a':
8796               if (name[2] == 'c' &&
8797                   name[3] == 'k')
8798               {                                   /* pack       */
8799                 return -KEY_pack;
8800               }
8801
8802               goto unknown;
8803
8804             case 'i':
8805               if (name[2] == 'p' &&
8806                   name[3] == 'e')
8807               {                                   /* pipe       */
8808                 return -KEY_pipe;
8809               }
8810
8811               goto unknown;
8812
8813             case 'u':
8814               if (name[2] == 's' &&
8815                   name[3] == 'h')
8816               {                                   /* push       */
8817                 return -KEY_push;
8818               }
8819
8820               goto unknown;
8821
8822             default:
8823               goto unknown;
8824           }
8825
8826         case 'r':
8827           switch (name[1])
8828           {
8829             case 'a':
8830               if (name[2] == 'n' &&
8831                   name[3] == 'd')
8832               {                                   /* rand       */
8833                 return -KEY_rand;
8834               }
8835
8836               goto unknown;
8837
8838             case 'e':
8839               switch (name[2])
8840               {
8841                 case 'a':
8842                   if (name[3] == 'd')
8843                   {                               /* read       */
8844                     return -KEY_read;
8845                   }
8846
8847                   goto unknown;
8848
8849                 case 'c':
8850                   if (name[3] == 'v')
8851                   {                               /* recv       */
8852                     return -KEY_recv;
8853                   }
8854
8855                   goto unknown;
8856
8857                 case 'd':
8858                   if (name[3] == 'o')
8859                   {                               /* redo       */
8860                     return KEY_redo;
8861                   }
8862
8863                   goto unknown;
8864
8865                 default:
8866                   goto unknown;
8867               }
8868
8869             default:
8870               goto unknown;
8871           }
8872
8873         case 's':
8874           switch (name[1])
8875           {
8876             case 'e':
8877               switch (name[2])
8878               {
8879                 case 'e':
8880                   if (name[3] == 'k')
8881                   {                               /* seek       */
8882                     return -KEY_seek;
8883                   }
8884
8885                   goto unknown;
8886
8887                 case 'n':
8888                   if (name[3] == 'd')
8889                   {                               /* send       */
8890                     return -KEY_send;
8891                   }
8892
8893                   goto unknown;
8894
8895                 default:
8896                   goto unknown;
8897               }
8898
8899             case 'o':
8900               if (name[2] == 'r' &&
8901                   name[3] == 't')
8902               {                                   /* sort       */
8903                 return KEY_sort;
8904               }
8905
8906               goto unknown;
8907
8908             case 'q':
8909               if (name[2] == 'r' &&
8910                   name[3] == 't')
8911               {                                   /* sqrt       */
8912                 return -KEY_sqrt;
8913               }
8914
8915               goto unknown;
8916
8917             case 't':
8918               if (name[2] == 'a' &&
8919                   name[3] == 't')
8920               {                                   /* stat       */
8921                 return -KEY_stat;
8922               }
8923
8924               goto unknown;
8925
8926             default:
8927               goto unknown;
8928           }
8929
8930         case 't':
8931           switch (name[1])
8932           {
8933             case 'e':
8934               if (name[2] == 'l' &&
8935                   name[3] == 'l')
8936               {                                   /* tell       */
8937                 return -KEY_tell;
8938               }
8939
8940               goto unknown;
8941
8942             case 'i':
8943               switch (name[2])
8944               {
8945                 case 'e':
8946                   if (name[3] == 'd')
8947                   {                               /* tied       */
8948                     return KEY_tied;
8949                   }
8950
8951                   goto unknown;
8952
8953                 case 'm':
8954                   if (name[3] == 'e')
8955                   {                               /* time       */
8956                     return -KEY_time;
8957                   }
8958
8959                   goto unknown;
8960
8961                 default:
8962                   goto unknown;
8963               }
8964
8965             default:
8966               goto unknown;
8967           }
8968
8969         case 'w':
8970           switch (name[1])
8971           {
8972             case 'a':
8973               switch (name[2])
8974               {
8975                 case 'i':
8976                   if (name[3] == 't')
8977                   {                               /* wait       */
8978                     return -KEY_wait;
8979                   }
8980
8981                   goto unknown;
8982
8983                 case 'r':
8984                   if (name[3] == 'n')
8985                   {                               /* warn       */
8986                     return -KEY_warn;
8987                   }
8988
8989                   goto unknown;
8990
8991                 default:
8992                   goto unknown;
8993               }
8994
8995             case 'h':
8996               if (name[2] == 'e' &&
8997                   name[3] == 'n')
8998               {                                   /* when       */
8999                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9000               }
9001
9002               goto unknown;
9003
9004             default:
9005               goto unknown;
9006           }
9007
9008         default:
9009           goto unknown;
9010       }
9011
9012     case 5: /* 39 tokens of length 5 */
9013       switch (name[0])
9014       {
9015         case 'B':
9016           if (name[1] == 'E' &&
9017               name[2] == 'G' &&
9018               name[3] == 'I' &&
9019               name[4] == 'N')
9020           {                                       /* BEGIN      */
9021             return KEY_BEGIN;
9022           }
9023
9024           goto unknown;
9025
9026         case 'C':
9027           if (name[1] == 'H' &&
9028               name[2] == 'E' &&
9029               name[3] == 'C' &&
9030               name[4] == 'K')
9031           {                                       /* CHECK      */
9032             return KEY_CHECK;
9033           }
9034
9035           goto unknown;
9036
9037         case 'a':
9038           switch (name[1])
9039           {
9040             case 'l':
9041               if (name[2] == 'a' &&
9042                   name[3] == 'r' &&
9043                   name[4] == 'm')
9044               {                                   /* alarm      */
9045                 return -KEY_alarm;
9046               }
9047
9048               goto unknown;
9049
9050             case 't':
9051               if (name[2] == 'a' &&
9052                   name[3] == 'n' &&
9053                   name[4] == '2')
9054               {                                   /* atan2      */
9055                 return -KEY_atan2;
9056               }
9057
9058               goto unknown;
9059
9060             default:
9061               goto unknown;
9062           }
9063
9064         case 'b':
9065           switch (name[1])
9066           {
9067             case 'l':
9068               if (name[2] == 'e' &&
9069                   name[3] == 's' &&
9070                   name[4] == 's')
9071               {                                   /* bless      */
9072                 return -KEY_bless;
9073               }
9074
9075               goto unknown;
9076
9077             case 'r':
9078               if (name[2] == 'e' &&
9079                   name[3] == 'a' &&
9080                   name[4] == 'k')
9081               {                                   /* break      */
9082                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9083               }
9084
9085               goto unknown;
9086
9087             default:
9088               goto unknown;
9089           }
9090
9091         case 'c':
9092           switch (name[1])
9093           {
9094             case 'h':
9095               switch (name[2])
9096               {
9097                 case 'd':
9098                   if (name[3] == 'i' &&
9099                       name[4] == 'r')
9100                   {                               /* chdir      */
9101                     return -KEY_chdir;
9102                   }
9103
9104                   goto unknown;
9105
9106                 case 'm':
9107                   if (name[3] == 'o' &&
9108                       name[4] == 'd')
9109                   {                               /* chmod      */
9110                     return -KEY_chmod;
9111                   }
9112
9113                   goto unknown;
9114
9115                 case 'o':
9116                   switch (name[3])
9117                   {
9118                     case 'm':
9119                       if (name[4] == 'p')
9120                       {                           /* chomp      */
9121                         return -KEY_chomp;
9122                       }
9123
9124                       goto unknown;
9125
9126                     case 'w':
9127                       if (name[4] == 'n')
9128                       {                           /* chown      */
9129                         return -KEY_chown;
9130                       }
9131
9132                       goto unknown;
9133
9134                     default:
9135                       goto unknown;
9136                   }
9137
9138                 default:
9139                   goto unknown;
9140               }
9141
9142             case 'l':
9143               if (name[2] == 'o' &&
9144                   name[3] == 's' &&
9145                   name[4] == 'e')
9146               {                                   /* close      */
9147                 return -KEY_close;
9148               }
9149
9150               goto unknown;
9151
9152             case 'r':
9153               if (name[2] == 'y' &&
9154                   name[3] == 'p' &&
9155                   name[4] == 't')
9156               {                                   /* crypt      */
9157                 return -KEY_crypt;
9158               }
9159
9160               goto unknown;
9161
9162             default:
9163               goto unknown;
9164           }
9165
9166         case 'e':
9167           if (name[1] == 'l' &&
9168               name[2] == 's' &&
9169               name[3] == 'i' &&
9170               name[4] == 'f')
9171           {                                       /* elsif      */
9172             return KEY_elsif;
9173           }
9174
9175           goto unknown;
9176
9177         case 'f':
9178           switch (name[1])
9179           {
9180             case 'c':
9181               if (name[2] == 'n' &&
9182                   name[3] == 't' &&
9183                   name[4] == 'l')
9184               {                                   /* fcntl      */
9185                 return -KEY_fcntl;
9186               }
9187
9188               goto unknown;
9189
9190             case 'l':
9191               if (name[2] == 'o' &&
9192                   name[3] == 'c' &&
9193                   name[4] == 'k')
9194               {                                   /* flock      */
9195                 return -KEY_flock;
9196               }
9197
9198               goto unknown;
9199
9200             default:
9201               goto unknown;
9202           }
9203
9204         case 'g':
9205           if (name[1] == 'i' &&
9206               name[2] == 'v' &&
9207               name[3] == 'e' &&
9208               name[4] == 'n')
9209           {                                       /* given      */
9210             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9211           }
9212
9213           goto unknown;
9214
9215         case 'i':
9216           switch (name[1])
9217           {
9218             case 'n':
9219               if (name[2] == 'd' &&
9220                   name[3] == 'e' &&
9221                   name[4] == 'x')
9222               {                                   /* index      */
9223                 return -KEY_index;
9224               }
9225
9226               goto unknown;
9227
9228             case 'o':
9229               if (name[2] == 'c' &&
9230                   name[3] == 't' &&
9231                   name[4] == 'l')
9232               {                                   /* ioctl      */
9233                 return -KEY_ioctl;
9234               }
9235
9236               goto unknown;
9237
9238             default:
9239               goto unknown;
9240           }
9241
9242         case 'l':
9243           switch (name[1])
9244           {
9245             case 'o':
9246               if (name[2] == 'c' &&
9247                   name[3] == 'a' &&
9248                   name[4] == 'l')
9249               {                                   /* local      */
9250                 return KEY_local;
9251               }
9252
9253               goto unknown;
9254
9255             case 's':
9256               if (name[2] == 't' &&
9257                   name[3] == 'a' &&
9258                   name[4] == 't')
9259               {                                   /* lstat      */
9260                 return -KEY_lstat;
9261               }
9262
9263               goto unknown;
9264
9265             default:
9266               goto unknown;
9267           }
9268
9269         case 'm':
9270           if (name[1] == 'k' &&
9271               name[2] == 'd' &&
9272               name[3] == 'i' &&
9273               name[4] == 'r')
9274           {                                       /* mkdir      */
9275             return -KEY_mkdir;
9276           }
9277
9278           goto unknown;
9279
9280         case 'p':
9281           if (name[1] == 'r' &&
9282               name[2] == 'i' &&
9283               name[3] == 'n' &&
9284               name[4] == 't')
9285           {                                       /* print      */
9286             return KEY_print;
9287           }
9288
9289           goto unknown;
9290
9291         case 'r':
9292           switch (name[1])
9293           {
9294             case 'e':
9295               if (name[2] == 's' &&
9296                   name[3] == 'e' &&
9297                   name[4] == 't')
9298               {                                   /* reset      */
9299                 return -KEY_reset;
9300               }
9301
9302               goto unknown;
9303
9304             case 'm':
9305               if (name[2] == 'd' &&
9306                   name[3] == 'i' &&
9307                   name[4] == 'r')
9308               {                                   /* rmdir      */
9309                 return -KEY_rmdir;
9310               }
9311
9312               goto unknown;
9313
9314             default:
9315               goto unknown;
9316           }
9317
9318         case 's':
9319           switch (name[1])
9320           {
9321             case 'e':
9322               if (name[2] == 'm' &&
9323                   name[3] == 'o' &&
9324                   name[4] == 'p')
9325               {                                   /* semop      */
9326                 return -KEY_semop;
9327               }
9328
9329               goto unknown;
9330
9331             case 'h':
9332               if (name[2] == 'i' &&
9333                   name[3] == 'f' &&
9334                   name[4] == 't')
9335               {                                   /* shift      */
9336                 return -KEY_shift;
9337               }
9338
9339               goto unknown;
9340
9341             case 'l':
9342               if (name[2] == 'e' &&
9343                   name[3] == 'e' &&
9344                   name[4] == 'p')
9345               {                                   /* sleep      */
9346                 return -KEY_sleep;
9347               }
9348
9349               goto unknown;
9350
9351             case 'p':
9352               if (name[2] == 'l' &&
9353                   name[3] == 'i' &&
9354                   name[4] == 't')
9355               {                                   /* split      */
9356                 return KEY_split;
9357               }
9358
9359               goto unknown;
9360
9361             case 'r':
9362               if (name[2] == 'a' &&
9363                   name[3] == 'n' &&
9364                   name[4] == 'd')
9365               {                                   /* srand      */
9366                 return -KEY_srand;
9367               }
9368
9369               goto unknown;
9370
9371             case 't':
9372               switch (name[2])
9373               {
9374                 case 'a':
9375                   if (name[3] == 't' &&
9376                       name[4] == 'e')
9377                   {                               /* state      */
9378                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9379                   }
9380
9381                   goto unknown;
9382
9383                 case 'u':
9384                   if (name[3] == 'd' &&
9385                       name[4] == 'y')
9386                   {                               /* study      */
9387                     return KEY_study;
9388                   }
9389
9390                   goto unknown;
9391
9392                 default:
9393                   goto unknown;
9394               }
9395
9396             default:
9397               goto unknown;
9398           }
9399
9400         case 't':
9401           if (name[1] == 'i' &&
9402               name[2] == 'm' &&
9403               name[3] == 'e' &&
9404               name[4] == 's')
9405           {                                       /* times      */
9406             return -KEY_times;
9407           }
9408
9409           goto unknown;
9410
9411         case 'u':
9412           switch (name[1])
9413           {
9414             case 'm':
9415               if (name[2] == 'a' &&
9416                   name[3] == 's' &&
9417                   name[4] == 'k')
9418               {                                   /* umask      */
9419                 return -KEY_umask;
9420               }
9421
9422               goto unknown;
9423
9424             case 'n':
9425               switch (name[2])
9426               {
9427                 case 'd':
9428                   if (name[3] == 'e' &&
9429                       name[4] == 'f')
9430                   {                               /* undef      */
9431                     return KEY_undef;
9432                   }
9433
9434                   goto unknown;
9435
9436                 case 't':
9437                   if (name[3] == 'i')
9438                   {
9439                     switch (name[4])
9440                     {
9441                       case 'e':
9442                         {                         /* untie      */
9443                           return KEY_untie;
9444                         }
9445
9446                       case 'l':
9447                         {                         /* until      */
9448                           return KEY_until;
9449                         }
9450
9451                       default:
9452                         goto unknown;
9453                     }
9454                   }
9455
9456                   goto unknown;
9457
9458                 default:
9459                   goto unknown;
9460               }
9461
9462             case 't':
9463               if (name[2] == 'i' &&
9464                   name[3] == 'm' &&
9465                   name[4] == 'e')
9466               {                                   /* utime      */
9467                 return -KEY_utime;
9468               }
9469
9470               goto unknown;
9471
9472             default:
9473               goto unknown;
9474           }
9475
9476         case 'w':
9477           switch (name[1])
9478           {
9479             case 'h':
9480               if (name[2] == 'i' &&
9481                   name[3] == 'l' &&
9482                   name[4] == 'e')
9483               {                                   /* while      */
9484                 return KEY_while;
9485               }
9486
9487               goto unknown;
9488
9489             case 'r':
9490               if (name[2] == 'i' &&
9491                   name[3] == 't' &&
9492                   name[4] == 'e')
9493               {                                   /* write      */
9494                 return -KEY_write;
9495               }
9496
9497               goto unknown;
9498
9499             default:
9500               goto unknown;
9501           }
9502
9503         default:
9504           goto unknown;
9505       }
9506
9507     case 6: /* 33 tokens of length 6 */
9508       switch (name[0])
9509       {
9510         case 'a':
9511           if (name[1] == 'c' &&
9512               name[2] == 'c' &&
9513               name[3] == 'e' &&
9514               name[4] == 'p' &&
9515               name[5] == 't')
9516           {                                       /* accept     */
9517             return -KEY_accept;
9518           }
9519
9520           goto unknown;
9521
9522         case 'c':
9523           switch (name[1])
9524           {
9525             case 'a':
9526               if (name[2] == 'l' &&
9527                   name[3] == 'l' &&
9528                   name[4] == 'e' &&
9529                   name[5] == 'r')
9530               {                                   /* caller     */
9531                 return -KEY_caller;
9532               }
9533
9534               goto unknown;
9535
9536             case 'h':
9537               if (name[2] == 'r' &&
9538                   name[3] == 'o' &&
9539                   name[4] == 'o' &&
9540                   name[5] == 't')
9541               {                                   /* chroot     */
9542                 return -KEY_chroot;
9543               }
9544
9545               goto unknown;
9546
9547             default:
9548               goto unknown;
9549           }
9550
9551         case 'd':
9552           if (name[1] == 'e' &&
9553               name[2] == 'l' &&
9554               name[3] == 'e' &&
9555               name[4] == 't' &&
9556               name[5] == 'e')
9557           {                                       /* delete     */
9558             return KEY_delete;
9559           }
9560
9561           goto unknown;
9562
9563         case 'e':
9564           switch (name[1])
9565           {
9566             case 'l':
9567               if (name[2] == 's' &&
9568                   name[3] == 'e' &&
9569                   name[4] == 'i' &&
9570                   name[5] == 'f')
9571               {                                   /* elseif     */
9572                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9573               }
9574
9575               goto unknown;
9576
9577             case 'x':
9578               if (name[2] == 'i' &&
9579                   name[3] == 's' &&
9580                   name[4] == 't' &&
9581                   name[5] == 's')
9582               {                                   /* exists     */
9583                 return KEY_exists;
9584               }
9585
9586               goto unknown;
9587
9588             default:
9589               goto unknown;
9590           }
9591
9592         case 'f':
9593           switch (name[1])
9594           {
9595             case 'i':
9596               if (name[2] == 'l' &&
9597                   name[3] == 'e' &&
9598                   name[4] == 'n' &&
9599                   name[5] == 'o')
9600               {                                   /* fileno     */
9601                 return -KEY_fileno;
9602               }
9603
9604               goto unknown;
9605
9606             case 'o':
9607               if (name[2] == 'r' &&
9608                   name[3] == 'm' &&
9609                   name[4] == 'a' &&
9610                   name[5] == 't')
9611               {                                   /* format     */
9612                 return KEY_format;
9613               }
9614
9615               goto unknown;
9616
9617             default:
9618               goto unknown;
9619           }
9620
9621         case 'g':
9622           if (name[1] == 'm' &&
9623               name[2] == 't' &&
9624               name[3] == 'i' &&
9625               name[4] == 'm' &&
9626               name[5] == 'e')
9627           {                                       /* gmtime     */
9628             return -KEY_gmtime;
9629           }
9630
9631           goto unknown;
9632
9633         case 'l':
9634           switch (name[1])
9635           {
9636             case 'e':
9637               if (name[2] == 'n' &&
9638                   name[3] == 'g' &&
9639                   name[4] == 't' &&
9640                   name[5] == 'h')
9641               {                                   /* length     */
9642                 return -KEY_length;
9643               }
9644
9645               goto unknown;
9646
9647             case 'i':
9648               if (name[2] == 's' &&
9649                   name[3] == 't' &&
9650                   name[4] == 'e' &&
9651                   name[5] == 'n')
9652               {                                   /* listen     */
9653                 return -KEY_listen;
9654               }
9655
9656               goto unknown;
9657
9658             default:
9659               goto unknown;
9660           }
9661
9662         case 'm':
9663           if (name[1] == 's' &&
9664               name[2] == 'g')
9665           {
9666             switch (name[3])
9667             {
9668               case 'c':
9669                 if (name[4] == 't' &&
9670                     name[5] == 'l')
9671                 {                                 /* msgctl     */
9672                   return -KEY_msgctl;
9673                 }
9674
9675                 goto unknown;
9676
9677               case 'g':
9678                 if (name[4] == 'e' &&
9679                     name[5] == 't')
9680                 {                                 /* msgget     */
9681                   return -KEY_msgget;
9682                 }
9683
9684                 goto unknown;
9685
9686               case 'r':
9687                 if (name[4] == 'c' &&
9688                     name[5] == 'v')
9689                 {                                 /* msgrcv     */
9690                   return -KEY_msgrcv;
9691                 }
9692
9693                 goto unknown;
9694
9695               case 's':
9696                 if (name[4] == 'n' &&
9697                     name[5] == 'd')
9698                 {                                 /* msgsnd     */
9699                   return -KEY_msgsnd;
9700                 }
9701
9702                 goto unknown;
9703
9704               default:
9705                 goto unknown;
9706             }
9707           }
9708
9709           goto unknown;
9710
9711         case 'p':
9712           if (name[1] == 'r' &&
9713               name[2] == 'i' &&
9714               name[3] == 'n' &&
9715               name[4] == 't' &&
9716               name[5] == 'f')
9717           {                                       /* printf     */
9718             return KEY_printf;
9719           }
9720
9721           goto unknown;
9722
9723         case 'r':
9724           switch (name[1])
9725           {
9726             case 'e':
9727               switch (name[2])
9728               {
9729                 case 'n':
9730                   if (name[3] == 'a' &&
9731                       name[4] == 'm' &&
9732                       name[5] == 'e')
9733                   {                               /* rename     */
9734                     return -KEY_rename;
9735                   }
9736
9737                   goto unknown;
9738
9739                 case 't':
9740                   if (name[3] == 'u' &&
9741                       name[4] == 'r' &&
9742                       name[5] == 'n')
9743                   {                               /* return     */
9744                     return KEY_return;
9745                   }
9746
9747                   goto unknown;
9748
9749                 default:
9750                   goto unknown;
9751               }
9752
9753             case 'i':
9754               if (name[2] == 'n' &&
9755                   name[3] == 'd' &&
9756                   name[4] == 'e' &&
9757                   name[5] == 'x')
9758               {                                   /* rindex     */
9759                 return -KEY_rindex;
9760               }
9761
9762               goto unknown;
9763
9764             default:
9765               goto unknown;
9766           }
9767
9768         case 's':
9769           switch (name[1])
9770           {
9771             case 'c':
9772               if (name[2] == 'a' &&
9773                   name[3] == 'l' &&
9774                   name[4] == 'a' &&
9775                   name[5] == 'r')
9776               {                                   /* scalar     */
9777                 return KEY_scalar;
9778               }
9779
9780               goto unknown;
9781
9782             case 'e':
9783               switch (name[2])
9784               {
9785                 case 'l':
9786                   if (name[3] == 'e' &&
9787                       name[4] == 'c' &&
9788                       name[5] == 't')
9789                   {                               /* select     */
9790                     return -KEY_select;
9791                   }
9792
9793                   goto unknown;
9794
9795                 case 'm':
9796                   switch (name[3])
9797                   {
9798                     case 'c':
9799                       if (name[4] == 't' &&
9800                           name[5] == 'l')
9801                       {                           /* semctl     */
9802                         return -KEY_semctl;
9803                       }
9804
9805                       goto unknown;
9806
9807                     case 'g':
9808                       if (name[4] == 'e' &&
9809                           name[5] == 't')
9810                       {                           /* semget     */
9811                         return -KEY_semget;
9812                       }
9813
9814                       goto unknown;
9815
9816                     default:
9817                       goto unknown;
9818                   }
9819
9820                 default:
9821                   goto unknown;
9822               }
9823
9824             case 'h':
9825               if (name[2] == 'm')
9826               {
9827                 switch (name[3])
9828                 {
9829                   case 'c':
9830                     if (name[4] == 't' &&
9831                         name[5] == 'l')
9832                     {                             /* shmctl     */
9833                       return -KEY_shmctl;
9834                     }
9835
9836                     goto unknown;
9837
9838                   case 'g':
9839                     if (name[4] == 'e' &&
9840                         name[5] == 't')
9841                     {                             /* shmget     */
9842                       return -KEY_shmget;
9843                     }
9844
9845                     goto unknown;
9846
9847                   default:
9848                     goto unknown;
9849                 }
9850               }
9851
9852               goto unknown;
9853
9854             case 'o':
9855               if (name[2] == 'c' &&
9856                   name[3] == 'k' &&
9857                   name[4] == 'e' &&
9858                   name[5] == 't')
9859               {                                   /* socket     */
9860                 return -KEY_socket;
9861               }
9862
9863               goto unknown;
9864
9865             case 'p':
9866               if (name[2] == 'l' &&
9867                   name[3] == 'i' &&
9868                   name[4] == 'c' &&
9869                   name[5] == 'e')
9870               {                                   /* splice     */
9871                 return -KEY_splice;
9872               }
9873
9874               goto unknown;
9875
9876             case 'u':
9877               if (name[2] == 'b' &&
9878                   name[3] == 's' &&
9879                   name[4] == 't' &&
9880                   name[5] == 'r')
9881               {                                   /* substr     */
9882                 return -KEY_substr;
9883               }
9884
9885               goto unknown;
9886
9887             case 'y':
9888               if (name[2] == 's' &&
9889                   name[3] == 't' &&
9890                   name[4] == 'e' &&
9891                   name[5] == 'm')
9892               {                                   /* system     */
9893                 return -KEY_system;
9894               }
9895
9896               goto unknown;
9897
9898             default:
9899               goto unknown;
9900           }
9901
9902         case 'u':
9903           if (name[1] == 'n')
9904           {
9905             switch (name[2])
9906             {
9907               case 'l':
9908                 switch (name[3])
9909                 {
9910                   case 'e':
9911                     if (name[4] == 's' &&
9912                         name[5] == 's')
9913                     {                             /* unless     */
9914                       return KEY_unless;
9915                     }
9916
9917                     goto unknown;
9918
9919                   case 'i':
9920                     if (name[4] == 'n' &&
9921                         name[5] == 'k')
9922                     {                             /* unlink     */
9923                       return -KEY_unlink;
9924                     }
9925
9926                     goto unknown;
9927
9928                   default:
9929                     goto unknown;
9930                 }
9931
9932               case 'p':
9933                 if (name[3] == 'a' &&
9934                     name[4] == 'c' &&
9935                     name[5] == 'k')
9936                 {                                 /* unpack     */
9937                   return -KEY_unpack;
9938                 }
9939
9940                 goto unknown;
9941
9942               default:
9943                 goto unknown;
9944             }
9945           }
9946
9947           goto unknown;
9948
9949         case 'v':
9950           if (name[1] == 'a' &&
9951               name[2] == 'l' &&
9952               name[3] == 'u' &&
9953               name[4] == 'e' &&
9954               name[5] == 's')
9955           {                                       /* values     */
9956             return -KEY_values;
9957           }
9958
9959           goto unknown;
9960
9961         default:
9962           goto unknown;
9963       }
9964
9965     case 7: /* 29 tokens of length 7 */
9966       switch (name[0])
9967       {
9968         case 'D':
9969           if (name[1] == 'E' &&
9970               name[2] == 'S' &&
9971               name[3] == 'T' &&
9972               name[4] == 'R' &&
9973               name[5] == 'O' &&
9974               name[6] == 'Y')
9975           {                                       /* DESTROY    */
9976             return KEY_DESTROY;
9977           }
9978
9979           goto unknown;
9980
9981         case '_':
9982           if (name[1] == '_' &&
9983               name[2] == 'E' &&
9984               name[3] == 'N' &&
9985               name[4] == 'D' &&
9986               name[5] == '_' &&
9987               name[6] == '_')
9988           {                                       /* __END__    */
9989             return KEY___END__;
9990           }
9991
9992           goto unknown;
9993
9994         case 'b':
9995           if (name[1] == 'i' &&
9996               name[2] == 'n' &&
9997               name[3] == 'm' &&
9998               name[4] == 'o' &&
9999               name[5] == 'd' &&
10000               name[6] == 'e')
10001           {                                       /* binmode    */
10002             return -KEY_binmode;
10003           }
10004
10005           goto unknown;
10006
10007         case 'c':
10008           if (name[1] == 'o' &&
10009               name[2] == 'n' &&
10010               name[3] == 'n' &&
10011               name[4] == 'e' &&
10012               name[5] == 'c' &&
10013               name[6] == 't')
10014           {                                       /* connect    */
10015             return -KEY_connect;
10016           }
10017
10018           goto unknown;
10019
10020         case 'd':
10021           switch (name[1])
10022           {
10023             case 'b':
10024               if (name[2] == 'm' &&
10025                   name[3] == 'o' &&
10026                   name[4] == 'p' &&
10027                   name[5] == 'e' &&
10028                   name[6] == 'n')
10029               {                                   /* dbmopen    */
10030                 return -KEY_dbmopen;
10031               }
10032
10033               goto unknown;
10034
10035             case 'e':
10036               if (name[2] == 'f')
10037               {
10038                 switch (name[3])
10039                 {
10040                   case 'a':
10041                     if (name[4] == 'u' &&
10042                         name[5] == 'l' &&
10043                         name[6] == 't')
10044                     {                             /* default    */
10045                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10046                     }
10047
10048                     goto unknown;
10049
10050                   case 'i':
10051                     if (name[4] == 'n' &&
10052                         name[5] == 'e' &&
10053                         name[6] == 'd')
10054                     {                             /* defined    */
10055                       return KEY_defined;
10056                     }
10057
10058                     goto unknown;
10059
10060                   default:
10061                     goto unknown;
10062                 }
10063               }
10064
10065               goto unknown;
10066
10067             default:
10068               goto unknown;
10069           }
10070
10071         case 'f':
10072           if (name[1] == 'o' &&
10073               name[2] == 'r' &&
10074               name[3] == 'e' &&
10075               name[4] == 'a' &&
10076               name[5] == 'c' &&
10077               name[6] == 'h')
10078           {                                       /* foreach    */
10079             return KEY_foreach;
10080           }
10081
10082           goto unknown;
10083
10084         case 'g':
10085           if (name[1] == 'e' &&
10086               name[2] == 't' &&
10087               name[3] == 'p')
10088           {
10089             switch (name[4])
10090             {
10091               case 'g':
10092                 if (name[5] == 'r' &&
10093                     name[6] == 'p')
10094                 {                                 /* getpgrp    */
10095                   return -KEY_getpgrp;
10096                 }
10097
10098                 goto unknown;
10099
10100               case 'p':
10101                 if (name[5] == 'i' &&
10102                     name[6] == 'd')
10103                 {                                 /* getppid    */
10104                   return -KEY_getppid;
10105                 }
10106
10107                 goto unknown;
10108
10109               default:
10110                 goto unknown;
10111             }
10112           }
10113
10114           goto unknown;
10115
10116         case 'l':
10117           if (name[1] == 'c' &&
10118               name[2] == 'f' &&
10119               name[3] == 'i' &&
10120               name[4] == 'r' &&
10121               name[5] == 's' &&
10122               name[6] == 't')
10123           {                                       /* lcfirst    */
10124             return -KEY_lcfirst;
10125           }
10126
10127           goto unknown;
10128
10129         case 'o':
10130           if (name[1] == 'p' &&
10131               name[2] == 'e' &&
10132               name[3] == 'n' &&
10133               name[4] == 'd' &&
10134               name[5] == 'i' &&
10135               name[6] == 'r')
10136           {                                       /* opendir    */
10137             return -KEY_opendir;
10138           }
10139
10140           goto unknown;
10141
10142         case 'p':
10143           if (name[1] == 'a' &&
10144               name[2] == 'c' &&
10145               name[3] == 'k' &&
10146               name[4] == 'a' &&
10147               name[5] == 'g' &&
10148               name[6] == 'e')
10149           {                                       /* package    */
10150             return KEY_package;
10151           }
10152
10153           goto unknown;
10154
10155         case 'r':
10156           if (name[1] == 'e')
10157           {
10158             switch (name[2])
10159             {
10160               case 'a':
10161                 if (name[3] == 'd' &&
10162                     name[4] == 'd' &&
10163                     name[5] == 'i' &&
10164                     name[6] == 'r')
10165                 {                                 /* readdir    */
10166                   return -KEY_readdir;
10167                 }
10168
10169                 goto unknown;
10170
10171               case 'q':
10172                 if (name[3] == 'u' &&
10173                     name[4] == 'i' &&
10174                     name[5] == 'r' &&
10175                     name[6] == 'e')
10176                 {                                 /* require    */
10177                   return KEY_require;
10178                 }
10179
10180                 goto unknown;
10181
10182               case 'v':
10183                 if (name[3] == 'e' &&
10184                     name[4] == 'r' &&
10185                     name[5] == 's' &&
10186                     name[6] == 'e')
10187                 {                                 /* reverse    */
10188                   return -KEY_reverse;
10189                 }
10190
10191                 goto unknown;
10192
10193               default:
10194                 goto unknown;
10195             }
10196           }
10197
10198           goto unknown;
10199
10200         case 's':
10201           switch (name[1])
10202           {
10203             case 'e':
10204               switch (name[2])
10205               {
10206                 case 'e':
10207                   if (name[3] == 'k' &&
10208                       name[4] == 'd' &&
10209                       name[5] == 'i' &&
10210                       name[6] == 'r')
10211                   {                               /* seekdir    */
10212                     return -KEY_seekdir;
10213                   }
10214
10215                   goto unknown;
10216
10217                 case 't':
10218                   if (name[3] == 'p' &&
10219                       name[4] == 'g' &&
10220                       name[5] == 'r' &&
10221                       name[6] == 'p')
10222                   {                               /* setpgrp    */
10223                     return -KEY_setpgrp;
10224                   }
10225
10226                   goto unknown;
10227
10228                 default:
10229                   goto unknown;
10230               }
10231
10232             case 'h':
10233               if (name[2] == 'm' &&
10234                   name[3] == 'r' &&
10235                   name[4] == 'e' &&
10236                   name[5] == 'a' &&
10237                   name[6] == 'd')
10238               {                                   /* shmread    */
10239                 return -KEY_shmread;
10240               }
10241
10242               goto unknown;
10243
10244             case 'p':
10245               if (name[2] == 'r' &&
10246                   name[3] == 'i' &&
10247                   name[4] == 'n' &&
10248                   name[5] == 't' &&
10249                   name[6] == 'f')
10250               {                                   /* sprintf    */
10251                 return -KEY_sprintf;
10252               }
10253
10254               goto unknown;
10255
10256             case 'y':
10257               switch (name[2])
10258               {
10259                 case 'm':
10260                   if (name[3] == 'l' &&
10261                       name[4] == 'i' &&
10262                       name[5] == 'n' &&
10263                       name[6] == 'k')
10264                   {                               /* symlink    */
10265                     return -KEY_symlink;
10266                   }
10267
10268                   goto unknown;
10269
10270                 case 's':
10271                   switch (name[3])
10272                   {
10273                     case 'c':
10274                       if (name[4] == 'a' &&
10275                           name[5] == 'l' &&
10276                           name[6] == 'l')
10277                       {                           /* syscall    */
10278                         return -KEY_syscall;
10279                       }
10280
10281                       goto unknown;
10282
10283                     case 'o':
10284                       if (name[4] == 'p' &&
10285                           name[5] == 'e' &&
10286                           name[6] == 'n')
10287                       {                           /* sysopen    */
10288                         return -KEY_sysopen;
10289                       }
10290
10291                       goto unknown;
10292
10293                     case 'r':
10294                       if (name[4] == 'e' &&
10295                           name[5] == 'a' &&
10296                           name[6] == 'd')
10297                       {                           /* sysread    */
10298                         return -KEY_sysread;
10299                       }
10300
10301                       goto unknown;
10302
10303                     case 's':
10304                       if (name[4] == 'e' &&
10305                           name[5] == 'e' &&
10306                           name[6] == 'k')
10307                       {                           /* sysseek    */
10308                         return -KEY_sysseek;
10309                       }
10310
10311                       goto unknown;
10312
10313                     default:
10314                       goto unknown;
10315                   }
10316
10317                 default:
10318                   goto unknown;
10319               }
10320
10321             default:
10322               goto unknown;
10323           }
10324
10325         case 't':
10326           if (name[1] == 'e' &&
10327               name[2] == 'l' &&
10328               name[3] == 'l' &&
10329               name[4] == 'd' &&
10330               name[5] == 'i' &&
10331               name[6] == 'r')
10332           {                                       /* telldir    */
10333             return -KEY_telldir;
10334           }
10335
10336           goto unknown;
10337
10338         case 'u':
10339           switch (name[1])
10340           {
10341             case 'c':
10342               if (name[2] == 'f' &&
10343                   name[3] == 'i' &&
10344                   name[4] == 'r' &&
10345                   name[5] == 's' &&
10346                   name[6] == 't')
10347               {                                   /* ucfirst    */
10348                 return -KEY_ucfirst;
10349               }
10350
10351               goto unknown;
10352
10353             case 'n':
10354               if (name[2] == 's' &&
10355                   name[3] == 'h' &&
10356                   name[4] == 'i' &&
10357                   name[5] == 'f' &&
10358                   name[6] == 't')
10359               {                                   /* unshift    */
10360                 return -KEY_unshift;
10361               }
10362
10363               goto unknown;
10364
10365             default:
10366               goto unknown;
10367           }
10368
10369         case 'w':
10370           if (name[1] == 'a' &&
10371               name[2] == 'i' &&
10372               name[3] == 't' &&
10373               name[4] == 'p' &&
10374               name[5] == 'i' &&
10375               name[6] == 'd')
10376           {                                       /* waitpid    */
10377             return -KEY_waitpid;
10378           }
10379
10380           goto unknown;
10381
10382         default:
10383           goto unknown;
10384       }
10385
10386     case 8: /* 26 tokens of length 8 */
10387       switch (name[0])
10388       {
10389         case 'A':
10390           if (name[1] == 'U' &&
10391               name[2] == 'T' &&
10392               name[3] == 'O' &&
10393               name[4] == 'L' &&
10394               name[5] == 'O' &&
10395               name[6] == 'A' &&
10396               name[7] == 'D')
10397           {                                       /* AUTOLOAD   */
10398             return KEY_AUTOLOAD;
10399           }
10400
10401           goto unknown;
10402
10403         case '_':
10404           if (name[1] == '_')
10405           {
10406             switch (name[2])
10407             {
10408               case 'D':
10409                 if (name[3] == 'A' &&
10410                     name[4] == 'T' &&
10411                     name[5] == 'A' &&
10412                     name[6] == '_' &&
10413                     name[7] == '_')
10414                 {                                 /* __DATA__   */
10415                   return KEY___DATA__;
10416                 }
10417
10418                 goto unknown;
10419
10420               case 'F':
10421                 if (name[3] == 'I' &&
10422                     name[4] == 'L' &&
10423                     name[5] == 'E' &&
10424                     name[6] == '_' &&
10425                     name[7] == '_')
10426                 {                                 /* __FILE__   */
10427                   return -KEY___FILE__;
10428                 }
10429
10430                 goto unknown;
10431
10432               case 'L':
10433                 if (name[3] == 'I' &&
10434                     name[4] == 'N' &&
10435                     name[5] == 'E' &&
10436                     name[6] == '_' &&
10437                     name[7] == '_')
10438                 {                                 /* __LINE__   */
10439                   return -KEY___LINE__;
10440                 }
10441
10442                 goto unknown;
10443
10444               default:
10445                 goto unknown;
10446             }
10447           }
10448
10449           goto unknown;
10450
10451         case 'c':
10452           switch (name[1])
10453           {
10454             case 'l':
10455               if (name[2] == 'o' &&
10456                   name[3] == 's' &&
10457                   name[4] == 'e' &&
10458                   name[5] == 'd' &&
10459                   name[6] == 'i' &&
10460                   name[7] == 'r')
10461               {                                   /* closedir   */
10462                 return -KEY_closedir;
10463               }
10464
10465               goto unknown;
10466
10467             case 'o':
10468               if (name[2] == 'n' &&
10469                   name[3] == 't' &&
10470                   name[4] == 'i' &&
10471                   name[5] == 'n' &&
10472                   name[6] == 'u' &&
10473                   name[7] == 'e')
10474               {                                   /* continue   */
10475                 return -KEY_continue;
10476               }
10477
10478               goto unknown;
10479
10480             default:
10481               goto unknown;
10482           }
10483
10484         case 'd':
10485           if (name[1] == 'b' &&
10486               name[2] == 'm' &&
10487               name[3] == 'c' &&
10488               name[4] == 'l' &&
10489               name[5] == 'o' &&
10490               name[6] == 's' &&
10491               name[7] == 'e')
10492           {                                       /* dbmclose   */
10493             return -KEY_dbmclose;
10494           }
10495
10496           goto unknown;
10497
10498         case 'e':
10499           if (name[1] == 'n' &&
10500               name[2] == 'd')
10501           {
10502             switch (name[3])
10503             {
10504               case 'g':
10505                 if (name[4] == 'r' &&
10506                     name[5] == 'e' &&
10507                     name[6] == 'n' &&
10508                     name[7] == 't')
10509                 {                                 /* endgrent   */
10510                   return -KEY_endgrent;
10511                 }
10512
10513                 goto unknown;
10514
10515               case 'p':
10516                 if (name[4] == 'w' &&
10517                     name[5] == 'e' &&
10518                     name[6] == 'n' &&
10519                     name[7] == 't')
10520                 {                                 /* endpwent   */
10521                   return -KEY_endpwent;
10522                 }
10523
10524                 goto unknown;
10525
10526               default:
10527                 goto unknown;
10528             }
10529           }
10530
10531           goto unknown;
10532
10533         case 'f':
10534           if (name[1] == 'o' &&
10535               name[2] == 'r' &&
10536               name[3] == 'm' &&
10537               name[4] == 'l' &&
10538               name[5] == 'i' &&
10539               name[6] == 'n' &&
10540               name[7] == 'e')
10541           {                                       /* formline   */
10542             return -KEY_formline;
10543           }
10544
10545           goto unknown;
10546
10547         case 'g':
10548           if (name[1] == 'e' &&
10549               name[2] == 't')
10550           {
10551             switch (name[3])
10552             {
10553               case 'g':
10554                 if (name[4] == 'r')
10555                 {
10556                   switch (name[5])
10557                   {
10558                     case 'e':
10559                       if (name[6] == 'n' &&
10560                           name[7] == 't')
10561                       {                           /* getgrent   */
10562                         return -KEY_getgrent;
10563                       }
10564
10565                       goto unknown;
10566
10567                     case 'g':
10568                       if (name[6] == 'i' &&
10569                           name[7] == 'd')
10570                       {                           /* getgrgid   */
10571                         return -KEY_getgrgid;
10572                       }
10573
10574                       goto unknown;
10575
10576                     case 'n':
10577                       if (name[6] == 'a' &&
10578                           name[7] == 'm')
10579                       {                           /* getgrnam   */
10580                         return -KEY_getgrnam;
10581                       }
10582
10583                       goto unknown;
10584
10585                     default:
10586                       goto unknown;
10587                   }
10588                 }
10589
10590                 goto unknown;
10591
10592               case 'l':
10593                 if (name[4] == 'o' &&
10594                     name[5] == 'g' &&
10595                     name[6] == 'i' &&
10596                     name[7] == 'n')
10597                 {                                 /* getlogin   */
10598                   return -KEY_getlogin;
10599                 }
10600
10601                 goto unknown;
10602
10603               case 'p':
10604                 if (name[4] == 'w')
10605                 {
10606                   switch (name[5])
10607                   {
10608                     case 'e':
10609                       if (name[6] == 'n' &&
10610                           name[7] == 't')
10611                       {                           /* getpwent   */
10612                         return -KEY_getpwent;
10613                       }
10614
10615                       goto unknown;
10616
10617                     case 'n':
10618                       if (name[6] == 'a' &&
10619                           name[7] == 'm')
10620                       {                           /* getpwnam   */
10621                         return -KEY_getpwnam;
10622                       }
10623
10624                       goto unknown;
10625
10626                     case 'u':
10627                       if (name[6] == 'i' &&
10628                           name[7] == 'd')
10629                       {                           /* getpwuid   */
10630                         return -KEY_getpwuid;
10631                       }
10632
10633                       goto unknown;
10634
10635                     default:
10636                       goto unknown;
10637                   }
10638                 }
10639
10640                 goto unknown;
10641
10642               default:
10643                 goto unknown;
10644             }
10645           }
10646
10647           goto unknown;
10648
10649         case 'r':
10650           if (name[1] == 'e' &&
10651               name[2] == 'a' &&
10652               name[3] == 'd')
10653           {
10654             switch (name[4])
10655             {
10656               case 'l':
10657                 if (name[5] == 'i' &&
10658                     name[6] == 'n')
10659                 {
10660                   switch (name[7])
10661                   {
10662                     case 'e':
10663                       {                           /* readline   */
10664                         return -KEY_readline;
10665                       }
10666
10667                     case 'k':
10668                       {                           /* readlink   */
10669                         return -KEY_readlink;
10670                       }
10671
10672                     default:
10673                       goto unknown;
10674                   }
10675                 }
10676
10677                 goto unknown;
10678
10679               case 'p':
10680                 if (name[5] == 'i' &&
10681                     name[6] == 'p' &&
10682                     name[7] == 'e')
10683                 {                                 /* readpipe   */
10684                   return -KEY_readpipe;
10685                 }
10686
10687                 goto unknown;
10688
10689               default:
10690                 goto unknown;
10691             }
10692           }
10693
10694           goto unknown;
10695
10696         case 's':
10697           switch (name[1])
10698           {
10699             case 'e':
10700               if (name[2] == 't')
10701               {
10702                 switch (name[3])
10703                 {
10704                   case 'g':
10705                     if (name[4] == 'r' &&
10706                         name[5] == 'e' &&
10707                         name[6] == 'n' &&
10708                         name[7] == 't')
10709                     {                             /* setgrent   */
10710                       return -KEY_setgrent;
10711                     }
10712
10713                     goto unknown;
10714
10715                   case 'p':
10716                     if (name[4] == 'w' &&
10717                         name[5] == 'e' &&
10718                         name[6] == 'n' &&
10719                         name[7] == 't')
10720                     {                             /* setpwent   */
10721                       return -KEY_setpwent;
10722                     }
10723
10724                     goto unknown;
10725
10726                   default:
10727                     goto unknown;
10728                 }
10729               }
10730
10731               goto unknown;
10732
10733             case 'h':
10734               switch (name[2])
10735               {
10736                 case 'm':
10737                   if (name[3] == 'w' &&
10738                       name[4] == 'r' &&
10739                       name[5] == 'i' &&
10740                       name[6] == 't' &&
10741                       name[7] == 'e')
10742                   {                               /* shmwrite   */
10743                     return -KEY_shmwrite;
10744                   }
10745
10746                   goto unknown;
10747
10748                 case 'u':
10749                   if (name[3] == 't' &&
10750                       name[4] == 'd' &&
10751                       name[5] == 'o' &&
10752                       name[6] == 'w' &&
10753                       name[7] == 'n')
10754                   {                               /* shutdown   */
10755                     return -KEY_shutdown;
10756                   }
10757
10758                   goto unknown;
10759
10760                 default:
10761                   goto unknown;
10762               }
10763
10764             case 'y':
10765               if (name[2] == 's' &&
10766                   name[3] == 'w' &&
10767                   name[4] == 'r' &&
10768                   name[5] == 'i' &&
10769                   name[6] == 't' &&
10770                   name[7] == 'e')
10771               {                                   /* syswrite   */
10772                 return -KEY_syswrite;
10773               }
10774
10775               goto unknown;
10776
10777             default:
10778               goto unknown;
10779           }
10780
10781         case 't':
10782           if (name[1] == 'r' &&
10783               name[2] == 'u' &&
10784               name[3] == 'n' &&
10785               name[4] == 'c' &&
10786               name[5] == 'a' &&
10787               name[6] == 't' &&
10788               name[7] == 'e')
10789           {                                       /* truncate   */
10790             return -KEY_truncate;
10791           }
10792
10793           goto unknown;
10794
10795         default:
10796           goto unknown;
10797       }
10798
10799     case 9: /* 9 tokens of length 9 */
10800       switch (name[0])
10801       {
10802         case 'U':
10803           if (name[1] == 'N' &&
10804               name[2] == 'I' &&
10805               name[3] == 'T' &&
10806               name[4] == 'C' &&
10807               name[5] == 'H' &&
10808               name[6] == 'E' &&
10809               name[7] == 'C' &&
10810               name[8] == 'K')
10811           {                                       /* UNITCHECK  */
10812             return KEY_UNITCHECK;
10813           }
10814
10815           goto unknown;
10816
10817         case 'e':
10818           if (name[1] == 'n' &&
10819               name[2] == 'd' &&
10820               name[3] == 'n' &&
10821               name[4] == 'e' &&
10822               name[5] == 't' &&
10823               name[6] == 'e' &&
10824               name[7] == 'n' &&
10825               name[8] == 't')
10826           {                                       /* endnetent  */
10827             return -KEY_endnetent;
10828           }
10829
10830           goto unknown;
10831
10832         case 'g':
10833           if (name[1] == 'e' &&
10834               name[2] == 't' &&
10835               name[3] == 'n' &&
10836               name[4] == 'e' &&
10837               name[5] == 't' &&
10838               name[6] == 'e' &&
10839               name[7] == 'n' &&
10840               name[8] == 't')
10841           {                                       /* getnetent  */
10842             return -KEY_getnetent;
10843           }
10844
10845           goto unknown;
10846
10847         case 'l':
10848           if (name[1] == 'o' &&
10849               name[2] == 'c' &&
10850               name[3] == 'a' &&
10851               name[4] == 'l' &&
10852               name[5] == 't' &&
10853               name[6] == 'i' &&
10854               name[7] == 'm' &&
10855               name[8] == 'e')
10856           {                                       /* localtime  */
10857             return -KEY_localtime;
10858           }
10859
10860           goto unknown;
10861
10862         case 'p':
10863           if (name[1] == 'r' &&
10864               name[2] == 'o' &&
10865               name[3] == 't' &&
10866               name[4] == 'o' &&
10867               name[5] == 't' &&
10868               name[6] == 'y' &&
10869               name[7] == 'p' &&
10870               name[8] == 'e')
10871           {                                       /* prototype  */
10872             return KEY_prototype;
10873           }
10874
10875           goto unknown;
10876
10877         case 'q':
10878           if (name[1] == 'u' &&
10879               name[2] == 'o' &&
10880               name[3] == 't' &&
10881               name[4] == 'e' &&
10882               name[5] == 'm' &&
10883               name[6] == 'e' &&
10884               name[7] == 't' &&
10885               name[8] == 'a')
10886           {                                       /* quotemeta  */
10887             return -KEY_quotemeta;
10888           }
10889
10890           goto unknown;
10891
10892         case 'r':
10893           if (name[1] == 'e' &&
10894               name[2] == 'w' &&
10895               name[3] == 'i' &&
10896               name[4] == 'n' &&
10897               name[5] == 'd' &&
10898               name[6] == 'd' &&
10899               name[7] == 'i' &&
10900               name[8] == 'r')
10901           {                                       /* rewinddir  */
10902             return -KEY_rewinddir;
10903           }
10904
10905           goto unknown;
10906
10907         case 's':
10908           if (name[1] == 'e' &&
10909               name[2] == 't' &&
10910               name[3] == 'n' &&
10911               name[4] == 'e' &&
10912               name[5] == 't' &&
10913               name[6] == 'e' &&
10914               name[7] == 'n' &&
10915               name[8] == 't')
10916           {                                       /* setnetent  */
10917             return -KEY_setnetent;
10918           }
10919
10920           goto unknown;
10921
10922         case 'w':
10923           if (name[1] == 'a' &&
10924               name[2] == 'n' &&
10925               name[3] == 't' &&
10926               name[4] == 'a' &&
10927               name[5] == 'r' &&
10928               name[6] == 'r' &&
10929               name[7] == 'a' &&
10930               name[8] == 'y')
10931           {                                       /* wantarray  */
10932             return -KEY_wantarray;
10933           }
10934
10935           goto unknown;
10936
10937         default:
10938           goto unknown;
10939       }
10940
10941     case 10: /* 9 tokens of length 10 */
10942       switch (name[0])
10943       {
10944         case 'e':
10945           if (name[1] == 'n' &&
10946               name[2] == 'd')
10947           {
10948             switch (name[3])
10949             {
10950               case 'h':
10951                 if (name[4] == 'o' &&
10952                     name[5] == 's' &&
10953                     name[6] == 't' &&
10954                     name[7] == 'e' &&
10955                     name[8] == 'n' &&
10956                     name[9] == 't')
10957                 {                                 /* endhostent */
10958                   return -KEY_endhostent;
10959                 }
10960
10961                 goto unknown;
10962
10963               case 's':
10964                 if (name[4] == 'e' &&
10965                     name[5] == 'r' &&
10966                     name[6] == 'v' &&
10967                     name[7] == 'e' &&
10968                     name[8] == 'n' &&
10969                     name[9] == 't')
10970                 {                                 /* endservent */
10971                   return -KEY_endservent;
10972                 }
10973
10974                 goto unknown;
10975
10976               default:
10977                 goto unknown;
10978             }
10979           }
10980
10981           goto unknown;
10982
10983         case 'g':
10984           if (name[1] == 'e' &&
10985               name[2] == 't')
10986           {
10987             switch (name[3])
10988             {
10989               case 'h':
10990                 if (name[4] == 'o' &&
10991                     name[5] == 's' &&
10992                     name[6] == 't' &&
10993                     name[7] == 'e' &&
10994                     name[8] == 'n' &&
10995                     name[9] == 't')
10996                 {                                 /* gethostent */
10997                   return -KEY_gethostent;
10998                 }
10999
11000                 goto unknown;
11001
11002               case 's':
11003                 switch (name[4])
11004                 {
11005                   case 'e':
11006                     if (name[5] == 'r' &&
11007                         name[6] == 'v' &&
11008                         name[7] == 'e' &&
11009                         name[8] == 'n' &&
11010                         name[9] == 't')
11011                     {                             /* getservent */
11012                       return -KEY_getservent;
11013                     }
11014
11015                     goto unknown;
11016
11017                   case 'o':
11018                     if (name[5] == 'c' &&
11019                         name[6] == 'k' &&
11020                         name[7] == 'o' &&
11021                         name[8] == 'p' &&
11022                         name[9] == 't')
11023                     {                             /* getsockopt */
11024                       return -KEY_getsockopt;
11025                     }
11026
11027                     goto unknown;
11028
11029                   default:
11030                     goto unknown;
11031                 }
11032
11033               default:
11034                 goto unknown;
11035             }
11036           }
11037
11038           goto unknown;
11039
11040         case 's':
11041           switch (name[1])
11042           {
11043             case 'e':
11044               if (name[2] == 't')
11045               {
11046                 switch (name[3])
11047                 {
11048                   case 'h':
11049                     if (name[4] == 'o' &&
11050                         name[5] == 's' &&
11051                         name[6] == 't' &&
11052                         name[7] == 'e' &&
11053                         name[8] == 'n' &&
11054                         name[9] == 't')
11055                     {                             /* sethostent */
11056                       return -KEY_sethostent;
11057                     }
11058
11059                     goto unknown;
11060
11061                   case 's':
11062                     switch (name[4])
11063                     {
11064                       case 'e':
11065                         if (name[5] == 'r' &&
11066                             name[6] == 'v' &&
11067                             name[7] == 'e' &&
11068                             name[8] == 'n' &&
11069                             name[9] == 't')
11070                         {                         /* setservent */
11071                           return -KEY_setservent;
11072                         }
11073
11074                         goto unknown;
11075
11076                       case 'o':
11077                         if (name[5] == 'c' &&
11078                             name[6] == 'k' &&
11079                             name[7] == 'o' &&
11080                             name[8] == 'p' &&
11081                             name[9] == 't')
11082                         {                         /* setsockopt */
11083                           return -KEY_setsockopt;
11084                         }
11085
11086                         goto unknown;
11087
11088                       default:
11089                         goto unknown;
11090                     }
11091
11092                   default:
11093                     goto unknown;
11094                 }
11095               }
11096
11097               goto unknown;
11098
11099             case 'o':
11100               if (name[2] == 'c' &&
11101                   name[3] == 'k' &&
11102                   name[4] == 'e' &&
11103                   name[5] == 't' &&
11104                   name[6] == 'p' &&
11105                   name[7] == 'a' &&
11106                   name[8] == 'i' &&
11107                   name[9] == 'r')
11108               {                                   /* socketpair */
11109                 return -KEY_socketpair;
11110               }
11111
11112               goto unknown;
11113
11114             default:
11115               goto unknown;
11116           }
11117
11118         default:
11119           goto unknown;
11120       }
11121
11122     case 11: /* 8 tokens of length 11 */
11123       switch (name[0])
11124       {
11125         case '_':
11126           if (name[1] == '_' &&
11127               name[2] == 'P' &&
11128               name[3] == 'A' &&
11129               name[4] == 'C' &&
11130               name[5] == 'K' &&
11131               name[6] == 'A' &&
11132               name[7] == 'G' &&
11133               name[8] == 'E' &&
11134               name[9] == '_' &&
11135               name[10] == '_')
11136           {                                       /* __PACKAGE__ */
11137             return -KEY___PACKAGE__;
11138           }
11139
11140           goto unknown;
11141
11142         case 'e':
11143           if (name[1] == 'n' &&
11144               name[2] == 'd' &&
11145               name[3] == 'p' &&
11146               name[4] == 'r' &&
11147               name[5] == 'o' &&
11148               name[6] == 't' &&
11149               name[7] == 'o' &&
11150               name[8] == 'e' &&
11151               name[9] == 'n' &&
11152               name[10] == 't')
11153           {                                       /* endprotoent */
11154             return -KEY_endprotoent;
11155           }
11156
11157           goto unknown;
11158
11159         case 'g':
11160           if (name[1] == 'e' &&
11161               name[2] == 't')
11162           {
11163             switch (name[3])
11164             {
11165               case 'p':
11166                 switch (name[4])
11167                 {
11168                   case 'e':
11169                     if (name[5] == 'e' &&
11170                         name[6] == 'r' &&
11171                         name[7] == 'n' &&
11172                         name[8] == 'a' &&
11173                         name[9] == 'm' &&
11174                         name[10] == 'e')
11175                     {                             /* getpeername */
11176                       return -KEY_getpeername;
11177                     }
11178
11179                     goto unknown;
11180
11181                   case 'r':
11182                     switch (name[5])
11183                     {
11184                       case 'i':
11185                         if (name[6] == 'o' &&
11186                             name[7] == 'r' &&
11187                             name[8] == 'i' &&
11188                             name[9] == 't' &&
11189                             name[10] == 'y')
11190                         {                         /* getpriority */
11191                           return -KEY_getpriority;
11192                         }
11193
11194                         goto unknown;
11195
11196                       case 'o':
11197                         if (name[6] == 't' &&
11198                             name[7] == 'o' &&
11199                             name[8] == 'e' &&
11200                             name[9] == 'n' &&
11201                             name[10] == 't')
11202                         {                         /* getprotoent */
11203                           return -KEY_getprotoent;
11204                         }
11205
11206                         goto unknown;
11207
11208                       default:
11209                         goto unknown;
11210                     }
11211
11212                   default:
11213                     goto unknown;
11214                 }
11215
11216               case 's':
11217                 if (name[4] == 'o' &&
11218                     name[5] == 'c' &&
11219                     name[6] == 'k' &&
11220                     name[7] == 'n' &&
11221                     name[8] == 'a' &&
11222                     name[9] == 'm' &&
11223                     name[10] == 'e')
11224                 {                                 /* getsockname */
11225                   return -KEY_getsockname;
11226                 }
11227
11228                 goto unknown;
11229
11230               default:
11231                 goto unknown;
11232             }
11233           }
11234
11235           goto unknown;
11236
11237         case 's':
11238           if (name[1] == 'e' &&
11239               name[2] == 't' &&
11240               name[3] == 'p' &&
11241               name[4] == 'r')
11242           {
11243             switch (name[5])
11244             {
11245               case 'i':
11246                 if (name[6] == 'o' &&
11247                     name[7] == 'r' &&
11248                     name[8] == 'i' &&
11249                     name[9] == 't' &&
11250                     name[10] == 'y')
11251                 {                                 /* setpriority */
11252                   return -KEY_setpriority;
11253                 }
11254
11255                 goto unknown;
11256
11257               case 'o':
11258                 if (name[6] == 't' &&
11259                     name[7] == 'o' &&
11260                     name[8] == 'e' &&
11261                     name[9] == 'n' &&
11262                     name[10] == 't')
11263                 {                                 /* setprotoent */
11264                   return -KEY_setprotoent;
11265                 }
11266
11267                 goto unknown;
11268
11269               default:
11270                 goto unknown;
11271             }
11272           }
11273
11274           goto unknown;
11275
11276         default:
11277           goto unknown;
11278       }
11279
11280     case 12: /* 2 tokens of length 12 */
11281       if (name[0] == 'g' &&
11282           name[1] == 'e' &&
11283           name[2] == 't' &&
11284           name[3] == 'n' &&
11285           name[4] == 'e' &&
11286           name[5] == 't' &&
11287           name[6] == 'b' &&
11288           name[7] == 'y')
11289       {
11290         switch (name[8])
11291         {
11292           case 'a':
11293             if (name[9] == 'd' &&
11294                 name[10] == 'd' &&
11295                 name[11] == 'r')
11296             {                                     /* getnetbyaddr */
11297               return -KEY_getnetbyaddr;
11298             }
11299
11300             goto unknown;
11301
11302           case 'n':
11303             if (name[9] == 'a' &&
11304                 name[10] == 'm' &&
11305                 name[11] == 'e')
11306             {                                     /* getnetbyname */
11307               return -KEY_getnetbyname;
11308             }
11309
11310             goto unknown;
11311
11312           default:
11313             goto unknown;
11314         }
11315       }
11316
11317       goto unknown;
11318
11319     case 13: /* 4 tokens of length 13 */
11320       if (name[0] == 'g' &&
11321           name[1] == 'e' &&
11322           name[2] == 't')
11323       {
11324         switch (name[3])
11325         {
11326           case 'h':
11327             if (name[4] == 'o' &&
11328                 name[5] == 's' &&
11329                 name[6] == 't' &&
11330                 name[7] == 'b' &&
11331                 name[8] == 'y')
11332             {
11333               switch (name[9])
11334               {
11335                 case 'a':
11336                   if (name[10] == 'd' &&
11337                       name[11] == 'd' &&
11338                       name[12] == 'r')
11339                   {                               /* gethostbyaddr */
11340                     return -KEY_gethostbyaddr;
11341                   }
11342
11343                   goto unknown;
11344
11345                 case 'n':
11346                   if (name[10] == 'a' &&
11347                       name[11] == 'm' &&
11348                       name[12] == 'e')
11349                   {                               /* gethostbyname */
11350                     return -KEY_gethostbyname;
11351                   }
11352
11353                   goto unknown;
11354
11355                 default:
11356                   goto unknown;
11357               }
11358             }
11359
11360             goto unknown;
11361
11362           case 's':
11363             if (name[4] == 'e' &&
11364                 name[5] == 'r' &&
11365                 name[6] == 'v' &&
11366                 name[7] == 'b' &&
11367                 name[8] == 'y')
11368             {
11369               switch (name[9])
11370               {
11371                 case 'n':
11372                   if (name[10] == 'a' &&
11373                       name[11] == 'm' &&
11374                       name[12] == 'e')
11375                   {                               /* getservbyname */
11376                     return -KEY_getservbyname;
11377                   }
11378
11379                   goto unknown;
11380
11381                 case 'p':
11382                   if (name[10] == 'o' &&
11383                       name[11] == 'r' &&
11384                       name[12] == 't')
11385                   {                               /* getservbyport */
11386                     return -KEY_getservbyport;
11387                   }
11388
11389                   goto unknown;
11390
11391                 default:
11392                   goto unknown;
11393               }
11394             }
11395
11396             goto unknown;
11397
11398           default:
11399             goto unknown;
11400         }
11401       }
11402
11403       goto unknown;
11404
11405     case 14: /* 1 tokens of length 14 */
11406       if (name[0] == 'g' &&
11407           name[1] == 'e' &&
11408           name[2] == 't' &&
11409           name[3] == 'p' &&
11410           name[4] == 'r' &&
11411           name[5] == 'o' &&
11412           name[6] == 't' &&
11413           name[7] == 'o' &&
11414           name[8] == 'b' &&
11415           name[9] == 'y' &&
11416           name[10] == 'n' &&
11417           name[11] == 'a' &&
11418           name[12] == 'm' &&
11419           name[13] == 'e')
11420       {                                           /* getprotobyname */
11421         return -KEY_getprotobyname;
11422       }
11423
11424       goto unknown;
11425
11426     case 16: /* 1 tokens of length 16 */
11427       if (name[0] == 'g' &&
11428           name[1] == 'e' &&
11429           name[2] == 't' &&
11430           name[3] == 'p' &&
11431           name[4] == 'r' &&
11432           name[5] == 'o' &&
11433           name[6] == 't' &&
11434           name[7] == 'o' &&
11435           name[8] == 'b' &&
11436           name[9] == 'y' &&
11437           name[10] == 'n' &&
11438           name[11] == 'u' &&
11439           name[12] == 'm' &&
11440           name[13] == 'b' &&
11441           name[14] == 'e' &&
11442           name[15] == 'r')
11443       {                                           /* getprotobynumber */
11444         return -KEY_getprotobynumber;
11445       }
11446
11447       goto unknown;
11448
11449     default:
11450       goto unknown;
11451   }
11452
11453 unknown:
11454   return 0;
11455 }
11456
11457 STATIC void
11458 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11459 {
11460     dVAR;
11461
11462     PERL_ARGS_ASSERT_CHECKCOMMA;
11463
11464     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11465         if (ckWARN(WARN_SYNTAX)) {
11466             int level = 1;
11467             const char *w;
11468             for (w = s+2; *w && level; w++) {
11469                 if (*w == '(')
11470                     ++level;
11471                 else if (*w == ')')
11472                     --level;
11473             }
11474             while (isSPACE(*w))
11475                 ++w;
11476             /* the list of chars below is for end of statements or
11477              * block / parens, boolean operators (&&, ||, //) and branch
11478              * constructs (or, and, if, until, unless, while, err, for).
11479              * Not a very solid hack... */
11480             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11481                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11482                             "%s (...) interpreted as function",name);
11483         }
11484     }
11485     while (s < PL_bufend && isSPACE(*s))
11486         s++;
11487     if (*s == '(')
11488         s++;
11489     while (s < PL_bufend && isSPACE(*s))
11490         s++;
11491     if (isIDFIRST_lazy_if(s,UTF)) {
11492         const char * const w = s++;
11493         while (isALNUM_lazy_if(s,UTF))
11494             s++;
11495         while (s < PL_bufend && isSPACE(*s))
11496             s++;
11497         if (*s == ',') {
11498             GV* gv;
11499             if (keyword(w, s - w, 0))
11500                 return;
11501
11502             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11503             if (gv && GvCVu(gv))
11504                 return;
11505             Perl_croak(aTHX_ "No comma allowed after %s", what);
11506         }
11507     }
11508 }
11509
11510 /* Either returns sv, or mortalizes sv and returns a new SV*.
11511    Best used as sv=new_constant(..., sv, ...).
11512    If s, pv are NULL, calls subroutine with one argument,
11513    and type is used with error messages only. */
11514
11515 STATIC SV *
11516 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11517                SV *sv, SV *pv, const char *type, STRLEN typelen)
11518 {
11519     dVAR; dSP;
11520     HV * const table = GvHV(PL_hintgv);          /* ^H */
11521     SV *res;
11522     SV **cvp;
11523     SV *cv, *typesv;
11524     const char *why1 = "", *why2 = "", *why3 = "";
11525
11526     PERL_ARGS_ASSERT_NEW_CONSTANT;
11527
11528     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11529         SV *msg;
11530
11531         why2 = (const char *)
11532             (strEQ(key,"charnames")
11533              ? "(possibly a missing \"use charnames ...\")"
11534              : "");
11535         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11536                             (type ? type: "undef"), why2);
11537
11538         /* This is convoluted and evil ("goto considered harmful")
11539          * but I do not understand the intricacies of all the different
11540          * failure modes of %^H in here.  The goal here is to make
11541          * the most probable error message user-friendly. --jhi */
11542
11543         goto msgdone;
11544
11545     report:
11546         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11547                             (type ? type: "undef"), why1, why2, why3);
11548     msgdone:
11549         yyerror(SvPVX_const(msg));
11550         SvREFCNT_dec(msg);
11551         return sv;
11552     }
11553
11554     /* charnames doesn't work well if there have been errors found */
11555     if (PL_error_count > 0 && strEQ(key,"charnames"))
11556         return &PL_sv_undef;
11557
11558     cvp = hv_fetch(table, key, keylen, FALSE);
11559     if (!cvp || !SvOK(*cvp)) {
11560         why1 = "$^H{";
11561         why2 = key;
11562         why3 = "} is not defined";
11563         goto report;
11564     }
11565     sv_2mortal(sv);                     /* Parent created it permanently */
11566     cv = *cvp;
11567     if (!pv && s)
11568         pv = newSVpvn_flags(s, len, SVs_TEMP);
11569     if (type && pv)
11570         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11571     else
11572         typesv = &PL_sv_undef;
11573
11574     PUSHSTACKi(PERLSI_OVERLOAD);
11575     ENTER ;
11576     SAVETMPS;
11577
11578     PUSHMARK(SP) ;
11579     EXTEND(sp, 3);
11580     if (pv)
11581         PUSHs(pv);
11582     PUSHs(sv);
11583     if (pv)
11584         PUSHs(typesv);
11585     PUTBACK;
11586     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11587
11588     SPAGAIN ;
11589
11590     /* Check the eval first */
11591     if (!PL_in_eval && SvTRUE(ERRSV)) {
11592         sv_catpvs(ERRSV, "Propagated");
11593         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11594         (void)POPs;
11595         res = SvREFCNT_inc_simple(sv);
11596     }
11597     else {
11598         res = POPs;
11599         SvREFCNT_inc_simple_void(res);
11600     }
11601
11602     PUTBACK ;
11603     FREETMPS ;
11604     LEAVE ;
11605     POPSTACK;
11606
11607     if (!SvOK(res)) {
11608         why1 = "Call to &{$^H{";
11609         why2 = key;
11610         why3 = "}} did not return a defined value";
11611         sv = res;
11612         goto report;
11613     }
11614
11615     return res;
11616 }
11617
11618 /* Returns a NUL terminated string, with the length of the string written to
11619    *slp
11620    */
11621 STATIC char *
11622 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11623 {
11624     dVAR;
11625     register char *d = dest;
11626     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11627
11628     PERL_ARGS_ASSERT_SCAN_WORD;
11629
11630     for (;;) {
11631         if (d >= e)
11632             Perl_croak(aTHX_ ident_too_long);
11633         if (isALNUM(*s))        /* UTF handled below */
11634             *d++ = *s++;
11635         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11636             *d++ = ':';
11637             *d++ = ':';
11638             s++;
11639         }
11640         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11641             *d++ = *s++;
11642             *d++ = *s++;
11643         }
11644         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11645             char *t = s + UTF8SKIP(s);
11646             size_t len;
11647             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11648                 t += UTF8SKIP(t);
11649             len = t - s;
11650             if (d + len > e)
11651                 Perl_croak(aTHX_ ident_too_long);
11652             Copy(s, d, len, char);
11653             d += len;
11654             s = t;
11655         }
11656         else {
11657             *d = '\0';
11658             *slp = d - dest;
11659             return s;
11660         }
11661     }
11662 }
11663
11664 STATIC char *
11665 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11666 {
11667     dVAR;
11668     char *bracket = NULL;
11669     char funny = *s++;
11670     register char *d = dest;
11671     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11672
11673     PERL_ARGS_ASSERT_SCAN_IDENT;
11674
11675     if (isSPACE(*s))
11676         s = PEEKSPACE(s);
11677     if (isDIGIT(*s)) {
11678         while (isDIGIT(*s)) {
11679             if (d >= e)
11680                 Perl_croak(aTHX_ ident_too_long);
11681             *d++ = *s++;
11682         }
11683     }
11684     else {
11685         for (;;) {
11686             if (d >= e)
11687                 Perl_croak(aTHX_ ident_too_long);
11688             if (isALNUM(*s))    /* UTF handled below */
11689                 *d++ = *s++;
11690             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11691                 *d++ = ':';
11692                 *d++ = ':';
11693                 s++;
11694             }
11695             else if (*s == ':' && s[1] == ':') {
11696                 *d++ = *s++;
11697                 *d++ = *s++;
11698             }
11699             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11700                 char *t = s + UTF8SKIP(s);
11701                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11702                     t += UTF8SKIP(t);
11703                 if (d + (t - s) > e)
11704                     Perl_croak(aTHX_ ident_too_long);
11705                 Copy(s, d, t - s, char);
11706                 d += t - s;
11707                 s = t;
11708             }
11709             else
11710                 break;
11711         }
11712     }
11713     *d = '\0';
11714     d = dest;
11715     if (*d) {
11716         if (PL_lex_state != LEX_NORMAL)
11717             PL_lex_state = LEX_INTERPENDMAYBE;
11718         return s;
11719     }
11720     if (*s == '$' && s[1] &&
11721         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11722     {
11723         return s;
11724     }
11725     if (*s == '{') {
11726         bracket = s;
11727         s++;
11728     }
11729     else if (ck_uni)
11730         check_uni();
11731     if (s < send)
11732         *d = *s++;
11733     d[1] = '\0';
11734     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11735         *d = toCTRL(*s);
11736         s++;
11737     }
11738     if (bracket) {
11739         if (isSPACE(s[-1])) {
11740             while (s < send) {
11741                 const char ch = *s++;
11742                 if (!SPACE_OR_TAB(ch)) {
11743                     *d = ch;
11744                     break;
11745                 }
11746             }
11747         }
11748         if (isIDFIRST_lazy_if(d,UTF)) {
11749             d++;
11750             if (UTF) {
11751                 char *end = s;
11752                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11753                     end += UTF8SKIP(end);
11754                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11755                         end += UTF8SKIP(end);
11756                 }
11757                 Copy(s, d, end - s, char);
11758                 d += end - s;
11759                 s = end;
11760             }
11761             else {
11762                 while ((isALNUM(*s) || *s == ':') && d < e)
11763                     *d++ = *s++;
11764                 if (d >= e)
11765                     Perl_croak(aTHX_ ident_too_long);
11766             }
11767             *d = '\0';
11768             while (s < send && SPACE_OR_TAB(*s))
11769                 s++;
11770             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11771                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11772                     const char * const brack =
11773                         (const char *)
11774                         ((*s == '[') ? "[...]" : "{...}");
11775                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11776                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11777                         funny, dest, brack, funny, dest, brack);
11778                 }
11779                 bracket++;
11780                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11781                 return s;
11782             }
11783         }
11784         /* Handle extended ${^Foo} variables
11785          * 1999-02-27 mjd-perl-patch@plover.com */
11786         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11787                  && isALNUM(*s))
11788         {
11789             d++;
11790             while (isALNUM(*s) && d < e) {
11791                 *d++ = *s++;
11792             }
11793             if (d >= e)
11794                 Perl_croak(aTHX_ ident_too_long);
11795             *d = '\0';
11796         }
11797         if (*s == '}') {
11798             s++;
11799             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11800                 PL_lex_state = LEX_INTERPEND;
11801                 PL_expect = XREF;
11802             }
11803             if (PL_lex_state == LEX_NORMAL) {
11804                 if (ckWARN(WARN_AMBIGUOUS) &&
11805                     (keyword(dest, d - dest, 0)
11806                      || get_cvn_flags(dest, d - dest, 0)))
11807                 {
11808                     if (funny == '#')
11809                         funny = '@';
11810                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11811                         "Ambiguous use of %c{%s} resolved to %c%s",
11812                         funny, dest, funny, dest);
11813                 }
11814             }
11815         }
11816         else {
11817             s = bracket;                /* let the parser handle it */
11818             *dest = '\0';
11819         }
11820     }
11821     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11822         PL_lex_state = LEX_INTERPEND;
11823     return s;
11824 }
11825
11826 static U32
11827 S_pmflag(U32 pmfl, const char ch) {
11828     switch (ch) {
11829         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11830     case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
11831     case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
11832     case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
11833     case KEEPCOPY_PAT_MOD:    pmfl |= PMf_KEEPCOPY; break;
11834     case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11835     }
11836     return pmfl;
11837 }
11838
11839 STATIC char *
11840 S_scan_pat(pTHX_ char *start, I32 type)
11841 {
11842     dVAR;
11843     PMOP *pm;
11844     char *s = scan_str(start,!!PL_madskills,FALSE);
11845     const char * const valid_flags =
11846         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11847 #ifdef PERL_MAD
11848     char *modstart;
11849 #endif
11850
11851     PERL_ARGS_ASSERT_SCAN_PAT;
11852
11853     if (!s) {
11854         const char * const delimiter = skipspace(start);
11855         Perl_croak(aTHX_
11856                    (const char *)
11857                    (*delimiter == '?'
11858                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11859                     : "Search pattern not terminated" ));
11860     }
11861
11862     pm = (PMOP*)newPMOP(type, 0);
11863     if (PL_multi_open == '?') {
11864         /* This is the only point in the code that sets PMf_ONCE:  */
11865         pm->op_pmflags |= PMf_ONCE;
11866
11867         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11868            allows us to restrict the list needed by reset to just the ??
11869            matches.  */
11870         assert(type != OP_TRANS);
11871         if (PL_curstash) {
11872             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11873             U32 elements;
11874             if (!mg) {
11875                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11876                                  0);
11877             }
11878             elements = mg->mg_len / sizeof(PMOP**);
11879             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11880             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11881             mg->mg_len = elements * sizeof(PMOP**);
11882             PmopSTASH_set(pm,PL_curstash);
11883         }
11884     }
11885 #ifdef PERL_MAD
11886     modstart = s;
11887 #endif
11888     while (*s && strchr(valid_flags, *s))
11889         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11890
11891     if (isALNUM(*s)) {
11892         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11893             "Having no space between pattern and following word is deprecated");
11894
11895     }
11896 #ifdef PERL_MAD
11897     if (PL_madskills && modstart != s) {
11898         SV* tmptoken = newSVpvn(modstart, s - modstart);
11899         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11900     }
11901 #endif
11902     /* issue a warning if /c is specified,but /g is not */
11903     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11904     {
11905         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11906                        "Use of /c modifier is meaningless without /g" );
11907     }
11908
11909     PL_lex_op = (OP*)pm;
11910     pl_yylval.ival = OP_MATCH;
11911     return s;
11912 }
11913
11914 STATIC char *
11915 S_scan_subst(pTHX_ char *start)
11916 {
11917     dVAR;
11918     register char *s;
11919     register PMOP *pm;
11920     I32 first_start;
11921     I32 es = 0;
11922 #ifdef PERL_MAD
11923     char *modstart;
11924 #endif
11925
11926     PERL_ARGS_ASSERT_SCAN_SUBST;
11927
11928     pl_yylval.ival = OP_NULL;
11929
11930     s = scan_str(start,!!PL_madskills,FALSE);
11931
11932     if (!s)
11933         Perl_croak(aTHX_ "Substitution pattern not terminated");
11934
11935     if (s[-1] == PL_multi_open)
11936         s--;
11937 #ifdef PERL_MAD
11938     if (PL_madskills) {
11939         CURMAD('q', PL_thisopen);
11940         CURMAD('_', PL_thiswhite);
11941         CURMAD('E', PL_thisstuff);
11942         CURMAD('Q', PL_thisclose);
11943         PL_realtokenstart = s - SvPVX(PL_linestr);
11944     }
11945 #endif
11946
11947     first_start = PL_multi_start;
11948     s = scan_str(s,!!PL_madskills,FALSE);
11949     if (!s) {
11950         if (PL_lex_stuff) {
11951             SvREFCNT_dec(PL_lex_stuff);
11952             PL_lex_stuff = NULL;
11953         }
11954         Perl_croak(aTHX_ "Substitution replacement not terminated");
11955     }
11956     PL_multi_start = first_start;       /* so whole substitution is taken together */
11957
11958     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11959
11960 #ifdef PERL_MAD
11961     if (PL_madskills) {
11962         CURMAD('z', PL_thisopen);
11963         CURMAD('R', PL_thisstuff);
11964         CURMAD('Z', PL_thisclose);
11965     }
11966     modstart = s;
11967 #endif
11968
11969     while (*s) {
11970         if (*s == EXEC_PAT_MOD) {
11971             s++;
11972             es++;
11973         }
11974         else if (strchr(S_PAT_MODS, *s))
11975             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11976         else
11977             break;
11978     }
11979
11980 #ifdef PERL_MAD
11981     if (PL_madskills) {
11982         if (modstart != s)
11983             curmad('m', newSVpvn(modstart, s - modstart));
11984         append_madprops(PL_thismad, (OP*)pm, 0);
11985         PL_thismad = 0;
11986     }
11987 #endif
11988     if ((pm->op_pmflags & PMf_CONTINUE)) {
11989         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11990     }
11991
11992     if (es) {
11993         SV * const repl = newSVpvs("");
11994
11995         PL_sublex_info.super_bufptr = s;
11996         PL_sublex_info.super_bufend = PL_bufend;
11997         PL_multi_end = 0;
11998         pm->op_pmflags |= PMf_EVAL;
11999         while (es-- > 0) {
12000             if (es)
12001                 sv_catpvs(repl, "eval ");
12002             else
12003                 sv_catpvs(repl, "do ");
12004         }
12005         sv_catpvs(repl, "{");
12006         sv_catsv(repl, PL_lex_repl);
12007         if (strchr(SvPVX(PL_lex_repl), '#'))
12008             sv_catpvs(repl, "\n");
12009         sv_catpvs(repl, "}");
12010         SvEVALED_on(repl);
12011         SvREFCNT_dec(PL_lex_repl);
12012         PL_lex_repl = repl;
12013     }
12014
12015     PL_lex_op = (OP*)pm;
12016     pl_yylval.ival = OP_SUBST;
12017     return s;
12018 }
12019
12020 STATIC char *
12021 S_scan_trans(pTHX_ char *start)
12022 {
12023     dVAR;
12024     register char* s;
12025     OP *o;
12026     short *tbl;
12027     U8 squash;
12028     U8 del;
12029     U8 complement;
12030 #ifdef PERL_MAD
12031     char *modstart;
12032 #endif
12033
12034     PERL_ARGS_ASSERT_SCAN_TRANS;
12035
12036     pl_yylval.ival = OP_NULL;
12037
12038     s = scan_str(start,!!PL_madskills,FALSE);
12039     if (!s)
12040         Perl_croak(aTHX_ "Transliteration pattern not terminated");
12041
12042     if (s[-1] == PL_multi_open)
12043         s--;
12044 #ifdef PERL_MAD
12045     if (PL_madskills) {
12046         CURMAD('q', PL_thisopen);
12047         CURMAD('_', PL_thiswhite);
12048         CURMAD('E', PL_thisstuff);
12049         CURMAD('Q', PL_thisclose);
12050         PL_realtokenstart = s - SvPVX(PL_linestr);
12051     }
12052 #endif
12053
12054     s = scan_str(s,!!PL_madskills,FALSE);
12055     if (!s) {
12056         if (PL_lex_stuff) {
12057             SvREFCNT_dec(PL_lex_stuff);
12058             PL_lex_stuff = NULL;
12059         }
12060         Perl_croak(aTHX_ "Transliteration replacement not terminated");
12061     }
12062     if (PL_madskills) {
12063         CURMAD('z', PL_thisopen);
12064         CURMAD('R', PL_thisstuff);
12065         CURMAD('Z', PL_thisclose);
12066     }
12067
12068     complement = del = squash = 0;
12069 #ifdef PERL_MAD
12070     modstart = s;
12071 #endif
12072     while (1) {
12073         switch (*s) {
12074         case 'c':
12075             complement = OPpTRANS_COMPLEMENT;
12076             break;
12077         case 'd':
12078             del = OPpTRANS_DELETE;
12079             break;
12080         case 's':
12081             squash = OPpTRANS_SQUASH;
12082             break;
12083         default:
12084             goto no_more;
12085         }
12086         s++;
12087     }
12088   no_more:
12089
12090     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12091     o = newPVOP(OP_TRANS, 0, (char*)tbl);
12092     o->op_private &= ~OPpTRANS_ALL;
12093     o->op_private |= del|squash|complement|
12094       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12095       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
12096
12097     PL_lex_op = o;
12098     pl_yylval.ival = OP_TRANS;
12099
12100 #ifdef PERL_MAD
12101     if (PL_madskills) {
12102         if (modstart != s)
12103             curmad('m', newSVpvn(modstart, s - modstart));
12104         append_madprops(PL_thismad, o, 0);
12105         PL_thismad = 0;
12106     }
12107 #endif
12108
12109     return s;
12110 }
12111
12112 STATIC char *
12113 S_scan_heredoc(pTHX_ register char *s)
12114 {
12115     dVAR;
12116     SV *herewas;
12117     I32 op_type = OP_SCALAR;
12118     I32 len;
12119     SV *tmpstr;
12120     char term;
12121     const char *found_newline;
12122     register char *d;
12123     register char *e;
12124     char *peek;
12125     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12126 #ifdef PERL_MAD
12127     I32 stuffstart = s - SvPVX(PL_linestr);
12128     char *tstart;
12129
12130     PL_realtokenstart = -1;
12131 #endif
12132
12133     PERL_ARGS_ASSERT_SCAN_HEREDOC;
12134
12135     s += 2;
12136     d = PL_tokenbuf;
12137     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12138     if (!outer)
12139         *d++ = '\n';
12140     peek = s;
12141     while (SPACE_OR_TAB(*peek))
12142         peek++;
12143     if (*peek == '`' || *peek == '\'' || *peek =='"') {
12144         s = peek;
12145         term = *s++;
12146         s = delimcpy(d, e, s, PL_bufend, term, &len);
12147         d += len;
12148         if (s < PL_bufend)
12149             s++;
12150     }
12151     else {
12152         if (*s == '\\')
12153             s++, term = '\'';
12154         else
12155             term = '"';
12156         if (!isALNUM_lazy_if(s,UTF))
12157             deprecate("bare << to mean <<\"\"");
12158         for (; isALNUM_lazy_if(s,UTF); s++) {
12159             if (d < e)
12160                 *d++ = *s;
12161         }
12162     }
12163     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12164         Perl_croak(aTHX_ "Delimiter for here document is too long");
12165     *d++ = '\n';
12166     *d = '\0';
12167     len = d - PL_tokenbuf;
12168
12169 #ifdef PERL_MAD
12170     if (PL_madskills) {
12171         tstart = PL_tokenbuf + !outer;
12172         PL_thisclose = newSVpvn(tstart, len - !outer);
12173         tstart = SvPVX(PL_linestr) + stuffstart;
12174         PL_thisopen = newSVpvn(tstart, s - tstart);
12175         stuffstart = s - SvPVX(PL_linestr);
12176     }
12177 #endif
12178 #ifndef PERL_STRICT_CR
12179     d = strchr(s, '\r');
12180     if (d) {
12181         char * const olds = s;
12182         s = d;
12183         while (s < PL_bufend) {
12184             if (*s == '\r') {
12185                 *d++ = '\n';
12186                 if (*++s == '\n')
12187                     s++;
12188             }
12189             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
12190                 *d++ = *s++;
12191                 s++;
12192             }
12193             else
12194                 *d++ = *s++;
12195         }
12196         *d = '\0';
12197         PL_bufend = d;
12198         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12199         s = olds;
12200     }
12201 #endif
12202 #ifdef PERL_MAD
12203     found_newline = 0;
12204 #endif
12205     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12206         herewas = newSVpvn(s,PL_bufend-s);
12207     }
12208     else {
12209 #ifdef PERL_MAD
12210         herewas = newSVpvn(s-1,found_newline-s+1);
12211 #else
12212         s--;
12213         herewas = newSVpvn(s,found_newline-s);
12214 #endif
12215     }
12216 #ifdef PERL_MAD
12217     if (PL_madskills) {
12218         tstart = SvPVX(PL_linestr) + stuffstart;
12219         if (PL_thisstuff)
12220             sv_catpvn(PL_thisstuff, tstart, s - tstart);
12221         else
12222             PL_thisstuff = newSVpvn(tstart, s - tstart);
12223     }
12224 #endif
12225     s += SvCUR(herewas);
12226
12227 #ifdef PERL_MAD
12228     stuffstart = s - SvPVX(PL_linestr);
12229
12230     if (found_newline)
12231         s--;
12232 #endif
12233
12234     tmpstr = newSV_type(SVt_PVIV);
12235     SvGROW(tmpstr, 80);
12236     if (term == '\'') {
12237         op_type = OP_CONST;
12238         SvIV_set(tmpstr, -1);
12239     }
12240     else if (term == '`') {
12241         op_type = OP_BACKTICK;
12242         SvIV_set(tmpstr, '\\');
12243     }
12244
12245     CLINE;
12246     PL_multi_start = CopLINE(PL_curcop);
12247     PL_multi_open = PL_multi_close = '<';
12248     term = *PL_tokenbuf;
12249     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12250         char * const bufptr = PL_sublex_info.super_bufptr;
12251         char * const bufend = PL_sublex_info.super_bufend;
12252         char * const olds = s - SvCUR(herewas);
12253         s = strchr(bufptr, '\n');
12254         if (!s)
12255             s = bufend;
12256         d = s;
12257         while (s < bufend &&
12258           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12259             if (*s++ == '\n')
12260                 CopLINE_inc(PL_curcop);
12261         }
12262         if (s >= bufend) {
12263             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12264             missingterm(PL_tokenbuf);
12265         }
12266         sv_setpvn(herewas,bufptr,d-bufptr+1);
12267         sv_setpvn(tmpstr,d+1,s-d);
12268         s += len - 1;
12269         sv_catpvn(herewas,s,bufend-s);
12270         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12271
12272         s = olds;
12273         goto retval;
12274     }
12275     else if (!outer) {
12276         d = s;
12277         while (s < PL_bufend &&
12278           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12279             if (*s++ == '\n')
12280                 CopLINE_inc(PL_curcop);
12281         }
12282         if (s >= PL_bufend) {
12283             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12284             missingterm(PL_tokenbuf);
12285         }
12286         sv_setpvn(tmpstr,d+1,s-d);
12287 #ifdef PERL_MAD
12288         if (PL_madskills) {
12289             if (PL_thisstuff)
12290                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12291             else
12292                 PL_thisstuff = newSVpvn(d + 1, s - d);
12293             stuffstart = s - SvPVX(PL_linestr);
12294         }
12295 #endif
12296         s += len - 1;
12297         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12298
12299         sv_catpvn(herewas,s,PL_bufend-s);
12300         sv_setsv(PL_linestr,herewas);
12301         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12302         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12303         PL_last_lop = PL_last_uni = NULL;
12304     }
12305     else
12306         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12307     while (s >= PL_bufend) {    /* multiple line string? */
12308 #ifdef PERL_MAD
12309         if (PL_madskills) {
12310             tstart = SvPVX(PL_linestr) + stuffstart;
12311             if (PL_thisstuff)
12312                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12313             else
12314                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12315         }
12316 #endif
12317         PL_bufptr = s;
12318         CopLINE_inc(PL_curcop);
12319         if (!outer || !lex_next_chunk(0)) {
12320             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12321             missingterm(PL_tokenbuf);
12322         }
12323         CopLINE_dec(PL_curcop);
12324         s = PL_bufptr;
12325 #ifdef PERL_MAD
12326         stuffstart = s - SvPVX(PL_linestr);
12327 #endif
12328         CopLINE_inc(PL_curcop);
12329         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12330         PL_last_lop = PL_last_uni = NULL;
12331 #ifndef PERL_STRICT_CR
12332         if (PL_bufend - PL_linestart >= 2) {
12333             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12334                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12335             {
12336                 PL_bufend[-2] = '\n';
12337                 PL_bufend--;
12338                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12339             }
12340             else if (PL_bufend[-1] == '\r')
12341                 PL_bufend[-1] = '\n';
12342         }
12343         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12344             PL_bufend[-1] = '\n';
12345 #endif
12346         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12347             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12348             *(SvPVX(PL_linestr) + off ) = ' ';
12349             sv_catsv(PL_linestr,herewas);
12350             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12351             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12352         }
12353         else {
12354             s = PL_bufend;
12355             sv_catsv(tmpstr,PL_linestr);
12356         }
12357     }
12358     s++;
12359 retval:
12360     PL_multi_end = CopLINE(PL_curcop);
12361     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12362         SvPV_shrink_to_cur(tmpstr);
12363     }
12364     SvREFCNT_dec(herewas);
12365     if (!IN_BYTES) {
12366         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12367             SvUTF8_on(tmpstr);
12368         else if (PL_encoding)
12369             sv_recode_to_utf8(tmpstr, PL_encoding);
12370     }
12371     PL_lex_stuff = tmpstr;
12372     pl_yylval.ival = op_type;
12373     return s;
12374 }
12375
12376 /* scan_inputsymbol
12377    takes: current position in input buffer
12378    returns: new position in input buffer
12379    side-effects: pl_yylval and lex_op are set.
12380
12381    This code handles:
12382
12383    <>           read from ARGV
12384    <FH>         read from filehandle
12385    <pkg::FH>    read from package qualified filehandle
12386    <pkg'FH>     read from package qualified filehandle
12387    <$fh>        read from filehandle in $fh
12388    <*.h>        filename glob
12389
12390 */
12391
12392 STATIC char *
12393 S_scan_inputsymbol(pTHX_ char *start)
12394 {
12395     dVAR;
12396     register char *s = start;           /* current position in buffer */
12397     char *end;
12398     I32 len;
12399     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12400     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12401
12402     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12403
12404     end = strchr(s, '\n');
12405     if (!end)
12406         end = PL_bufend;
12407     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12408
12409     /* die if we didn't have space for the contents of the <>,
12410        or if it didn't end, or if we see a newline
12411     */
12412
12413     if (len >= (I32)sizeof PL_tokenbuf)
12414         Perl_croak(aTHX_ "Excessively long <> operator");
12415     if (s >= end)
12416         Perl_croak(aTHX_ "Unterminated <> operator");
12417
12418     s++;
12419
12420     /* check for <$fh>
12421        Remember, only scalar variables are interpreted as filehandles by
12422        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12423        treated as a glob() call.
12424        This code makes use of the fact that except for the $ at the front,
12425        a scalar variable and a filehandle look the same.
12426     */
12427     if (*d == '$' && d[1]) d++;
12428
12429     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12430     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12431         d++;
12432
12433     /* If we've tried to read what we allow filehandles to look like, and
12434        there's still text left, then it must be a glob() and not a getline.
12435        Use scan_str to pull out the stuff between the <> and treat it
12436        as nothing more than a string.
12437     */
12438
12439     if (d - PL_tokenbuf != len) {
12440         pl_yylval.ival = OP_GLOB;
12441         s = scan_str(start,!!PL_madskills,FALSE);
12442         if (!s)
12443            Perl_croak(aTHX_ "Glob not terminated");
12444         return s;
12445     }
12446     else {
12447         bool readline_overriden = FALSE;
12448         GV *gv_readline;
12449         GV **gvp;
12450         /* we're in a filehandle read situation */
12451         d = PL_tokenbuf;
12452
12453         /* turn <> into <ARGV> */
12454         if (!len)
12455             Copy("ARGV",d,5,char);
12456
12457         /* Check whether readline() is overriden */
12458         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12459         if ((gv_readline
12460                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12461                 ||
12462                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12463                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12464                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12465             readline_overriden = TRUE;
12466
12467         /* if <$fh>, create the ops to turn the variable into a
12468            filehandle
12469         */
12470         if (*d == '$') {
12471             /* try to find it in the pad for this block, otherwise find
12472                add symbol table ops
12473             */
12474             const PADOFFSET tmp = pad_findmy(d, len, 0);
12475             if (tmp != NOT_IN_PAD) {
12476                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12477                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12478                     HEK * const stashname = HvNAME_HEK(stash);
12479                     SV * const sym = sv_2mortal(newSVhek(stashname));
12480                     sv_catpvs(sym, "::");
12481                     sv_catpv(sym, d+1);
12482                     d = SvPVX(sym);
12483                     goto intro_sym;
12484                 }
12485                 else {
12486                     OP * const o = newOP(OP_PADSV, 0);
12487                     o->op_targ = tmp;
12488                     PL_lex_op = readline_overriden
12489                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12490                                 append_elem(OP_LIST, o,
12491                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12492                         : (OP*)newUNOP(OP_READLINE, 0, o);
12493                 }
12494             }
12495             else {
12496                 GV *gv;
12497                 ++d;
12498 intro_sym:
12499                 gv = gv_fetchpv(d,
12500                                 (PL_in_eval
12501                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12502                                  : GV_ADDMULTI),
12503                                 SVt_PV);
12504                 PL_lex_op = readline_overriden
12505                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12506                             append_elem(OP_LIST,
12507                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12508                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12509                     : (OP*)newUNOP(OP_READLINE, 0,
12510                             newUNOP(OP_RV2SV, 0,
12511                                 newGVOP(OP_GV, 0, gv)));
12512             }
12513             if (!readline_overriden)
12514                 PL_lex_op->op_flags |= OPf_SPECIAL;
12515             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12516             pl_yylval.ival = OP_NULL;
12517         }
12518
12519         /* If it's none of the above, it must be a literal filehandle
12520            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12521         else {
12522             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12523             PL_lex_op = readline_overriden
12524                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12525                         append_elem(OP_LIST,
12526                             newGVOP(OP_GV, 0, gv),
12527                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12528                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12529             pl_yylval.ival = OP_NULL;
12530         }
12531     }
12532
12533     return s;
12534 }
12535
12536
12537 /* scan_str
12538    takes: start position in buffer
12539           keep_quoted preserve \ on the embedded delimiter(s)
12540           keep_delims preserve the delimiters around the string
12541    returns: position to continue reading from buffer
12542    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12543         updates the read buffer.
12544
12545    This subroutine pulls a string out of the input.  It is called for:
12546         q               single quotes           q(literal text)
12547         '               single quotes           'literal text'
12548         qq              double quotes           qq(interpolate $here please)
12549         "               double quotes           "interpolate $here please"
12550         qx              backticks               qx(/bin/ls -l)
12551         `               backticks               `/bin/ls -l`
12552         qw              quote words             @EXPORT_OK = qw( func() $spam )
12553         m//             regexp match            m/this/
12554         s///            regexp substitute       s/this/that/
12555         tr///           string transliterate    tr/this/that/
12556         y///            string transliterate    y/this/that/
12557         ($*@)           sub prototypes          sub foo ($)
12558         (stuff)         sub attr parameters     sub foo : attr(stuff)
12559         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12560
12561    In most of these cases (all but <>, patterns and transliterate)
12562    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12563    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12564    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12565    calls scan_str().
12566
12567    It skips whitespace before the string starts, and treats the first
12568    character as the delimiter.  If the delimiter is one of ([{< then
12569    the corresponding "close" character )]}> is used as the closing
12570    delimiter.  It allows quoting of delimiters, and if the string has
12571    balanced delimiters ([{<>}]) it allows nesting.
12572
12573    On success, the SV with the resulting string is put into lex_stuff or,
12574    if that is already non-NULL, into lex_repl. The second case occurs only
12575    when parsing the RHS of the special constructs s/// and tr/// (y///).
12576    For convenience, the terminating delimiter character is stuffed into
12577    SvIVX of the SV.
12578 */
12579
12580 STATIC char *
12581 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12582 {
12583     dVAR;
12584     SV *sv;                             /* scalar value: string */
12585     const char *tmps;                   /* temp string, used for delimiter matching */
12586     register char *s = start;           /* current position in the buffer */
12587     register char term;                 /* terminating character */
12588     register char *to;                  /* current position in the sv's data */
12589     I32 brackets = 1;                   /* bracket nesting level */
12590     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12591     I32 termcode;                       /* terminating char. code */
12592     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12593     STRLEN termlen;                     /* length of terminating string */
12594     int last_off = 0;                   /* last position for nesting bracket */
12595 #ifdef PERL_MAD
12596     int stuffstart;
12597     char *tstart;
12598 #endif
12599
12600     PERL_ARGS_ASSERT_SCAN_STR;
12601
12602     /* skip space before the delimiter */
12603     if (isSPACE(*s)) {
12604         s = PEEKSPACE(s);
12605     }
12606
12607 #ifdef PERL_MAD
12608     if (PL_realtokenstart >= 0) {
12609         stuffstart = PL_realtokenstart;
12610         PL_realtokenstart = -1;
12611     }
12612     else
12613         stuffstart = start - SvPVX(PL_linestr);
12614 #endif
12615     /* mark where we are, in case we need to report errors */
12616     CLINE;
12617
12618     /* after skipping whitespace, the next character is the terminator */
12619     term = *s;
12620     if (!UTF) {
12621         termcode = termstr[0] = term;
12622         termlen = 1;
12623     }
12624     else {
12625         termcode = utf8_to_uvchr((U8*)s, &termlen);
12626         Copy(s, termstr, termlen, U8);
12627         if (!UTF8_IS_INVARIANT(term))
12628             has_utf8 = TRUE;
12629     }
12630
12631     /* mark where we are */
12632     PL_multi_start = CopLINE(PL_curcop);
12633     PL_multi_open = term;
12634
12635     /* find corresponding closing delimiter */
12636     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12637         termcode = termstr[0] = term = tmps[5];
12638
12639     PL_multi_close = term;
12640
12641     /* create a new SV to hold the contents.  79 is the SV's initial length.
12642        What a random number. */
12643     sv = newSV_type(SVt_PVIV);
12644     SvGROW(sv, 80);
12645     SvIV_set(sv, termcode);
12646     (void)SvPOK_only(sv);               /* validate pointer */
12647
12648     /* move past delimiter and try to read a complete string */
12649     if (keep_delims)
12650         sv_catpvn(sv, s, termlen);
12651     s += termlen;
12652 #ifdef PERL_MAD
12653     tstart = SvPVX(PL_linestr) + stuffstart;
12654     if (!PL_thisopen && !keep_delims) {
12655         PL_thisopen = newSVpvn(tstart, s - tstart);
12656         stuffstart = s - SvPVX(PL_linestr);
12657     }
12658 #endif
12659     for (;;) {
12660         if (PL_encoding && !UTF) {
12661             bool cont = TRUE;
12662
12663             while (cont) {
12664                 int offset = s - SvPVX_const(PL_linestr);
12665                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12666                                            &offset, (char*)termstr, termlen);
12667                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12668                 char * const svlast = SvEND(sv) - 1;
12669
12670                 for (; s < ns; s++) {
12671                     if (*s == '\n' && !PL_rsfp)
12672                         CopLINE_inc(PL_curcop);
12673                 }
12674                 if (!found)
12675                     goto read_more_line;
12676                 else {
12677                     /* handle quoted delimiters */
12678                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12679                         const char *t;
12680                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12681                             t--;
12682                         if ((svlast-1 - t) % 2) {
12683                             if (!keep_quoted) {
12684                                 *(svlast-1) = term;
12685                                 *svlast = '\0';
12686                                 SvCUR_set(sv, SvCUR(sv) - 1);
12687                             }
12688                             continue;
12689                         }
12690                     }
12691                     if (PL_multi_open == PL_multi_close) {
12692                         cont = FALSE;
12693                     }
12694                     else {
12695                         const char *t;
12696                         char *w;
12697                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12698                             /* At here, all closes are "was quoted" one,
12699                                so we don't check PL_multi_close. */
12700                             if (*t == '\\') {
12701                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12702                                     t++;
12703                                 else
12704                                     *w++ = *t++;
12705                             }
12706                             else if (*t == PL_multi_open)
12707                                 brackets++;
12708
12709                             *w = *t;
12710                         }
12711                         if (w < t) {
12712                             *w++ = term;
12713                             *w = '\0';
12714                             SvCUR_set(sv, w - SvPVX_const(sv));
12715                         }
12716                         last_off = w - SvPVX(sv);
12717                         if (--brackets <= 0)
12718                             cont = FALSE;
12719                     }
12720                 }
12721             }
12722             if (!keep_delims) {
12723                 SvCUR_set(sv, SvCUR(sv) - 1);
12724                 *SvEND(sv) = '\0';
12725             }
12726             break;
12727         }
12728
12729         /* extend sv if need be */
12730         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12731         /* set 'to' to the next character in the sv's string */
12732         to = SvPVX(sv)+SvCUR(sv);
12733
12734         /* if open delimiter is the close delimiter read unbridle */
12735         if (PL_multi_open == PL_multi_close) {
12736             for (; s < PL_bufend; s++,to++) {
12737                 /* embedded newlines increment the current line number */
12738                 if (*s == '\n' && !PL_rsfp)
12739                     CopLINE_inc(PL_curcop);
12740                 /* handle quoted delimiters */
12741                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12742                     if (!keep_quoted && s[1] == term)
12743                         s++;
12744                 /* any other quotes are simply copied straight through */
12745                     else
12746                         *to++ = *s++;
12747                 }
12748                 /* terminate when run out of buffer (the for() condition), or
12749                    have found the terminator */
12750                 else if (*s == term) {
12751                     if (termlen == 1)
12752                         break;
12753                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12754                         break;
12755                 }
12756                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12757                     has_utf8 = TRUE;
12758                 *to = *s;
12759             }
12760         }
12761
12762         /* if the terminator isn't the same as the start character (e.g.,
12763            matched brackets), we have to allow more in the quoting, and
12764            be prepared for nested brackets.
12765         */
12766         else {
12767             /* read until we run out of string, or we find the terminator */
12768             for (; s < PL_bufend; s++,to++) {
12769                 /* embedded newlines increment the line count */
12770                 if (*s == '\n' && !PL_rsfp)
12771                     CopLINE_inc(PL_curcop);
12772                 /* backslashes can escape the open or closing characters */
12773                 if (*s == '\\' && s+1 < PL_bufend) {
12774                     if (!keep_quoted &&
12775                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12776                         s++;
12777                     else
12778                         *to++ = *s++;
12779                 }
12780                 /* allow nested opens and closes */
12781                 else if (*s == PL_multi_close && --brackets <= 0)
12782                     break;
12783                 else if (*s == PL_multi_open)
12784                     brackets++;
12785                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12786                     has_utf8 = TRUE;
12787                 *to = *s;
12788             }
12789         }
12790         /* terminate the copied string and update the sv's end-of-string */
12791         *to = '\0';
12792         SvCUR_set(sv, to - SvPVX_const(sv));
12793
12794         /*
12795          * this next chunk reads more into the buffer if we're not done yet
12796          */
12797
12798         if (s < PL_bufend)
12799             break;              /* handle case where we are done yet :-) */
12800
12801 #ifndef PERL_STRICT_CR
12802         if (to - SvPVX_const(sv) >= 2) {
12803             if ((to[-2] == '\r' && to[-1] == '\n') ||
12804                 (to[-2] == '\n' && to[-1] == '\r'))
12805             {
12806                 to[-2] = '\n';
12807                 to--;
12808                 SvCUR_set(sv, to - SvPVX_const(sv));
12809             }
12810             else if (to[-1] == '\r')
12811                 to[-1] = '\n';
12812         }
12813         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12814             to[-1] = '\n';
12815 #endif
12816
12817      read_more_line:
12818         /* if we're out of file, or a read fails, bail and reset the current
12819            line marker so we can report where the unterminated string began
12820         */
12821 #ifdef PERL_MAD
12822         if (PL_madskills) {
12823             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12824             if (PL_thisstuff)
12825                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12826             else
12827                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12828         }
12829 #endif
12830         CopLINE_inc(PL_curcop);
12831         PL_bufptr = PL_bufend;
12832         if (!lex_next_chunk(0)) {
12833             sv_free(sv);
12834             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12835             return NULL;
12836         }
12837         s = PL_bufptr;
12838 #ifdef PERL_MAD
12839         stuffstart = 0;
12840 #endif
12841     }
12842
12843     /* at this point, we have successfully read the delimited string */
12844
12845     if (!PL_encoding || UTF) {
12846 #ifdef PERL_MAD
12847         if (PL_madskills) {
12848             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12849             const int len = s - tstart;
12850             if (PL_thisstuff)
12851                 sv_catpvn(PL_thisstuff, tstart, len);
12852             else
12853                 PL_thisstuff = newSVpvn(tstart, len);
12854             if (!PL_thisclose && !keep_delims)
12855                 PL_thisclose = newSVpvn(s,termlen);
12856         }
12857 #endif
12858
12859         if (keep_delims)
12860             sv_catpvn(sv, s, termlen);
12861         s += termlen;
12862     }
12863 #ifdef PERL_MAD
12864     else {
12865         if (PL_madskills) {
12866             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12867             const int len = s - tstart - termlen;
12868             if (PL_thisstuff)
12869                 sv_catpvn(PL_thisstuff, tstart, len);
12870             else
12871                 PL_thisstuff = newSVpvn(tstart, len);
12872             if (!PL_thisclose && !keep_delims)
12873                 PL_thisclose = newSVpvn(s - termlen,termlen);
12874         }
12875     }
12876 #endif
12877     if (has_utf8 || PL_encoding)
12878         SvUTF8_on(sv);
12879
12880     PL_multi_end = CopLINE(PL_curcop);
12881
12882     /* if we allocated too much space, give some back */
12883     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12884         SvLEN_set(sv, SvCUR(sv) + 1);
12885         SvPV_renew(sv, SvLEN(sv));
12886     }
12887
12888     /* decide whether this is the first or second quoted string we've read
12889        for this op
12890     */
12891
12892     if (PL_lex_stuff)
12893         PL_lex_repl = sv;
12894     else
12895         PL_lex_stuff = sv;
12896     return s;
12897 }
12898
12899 /*
12900   scan_num
12901   takes: pointer to position in buffer
12902   returns: pointer to new position in buffer
12903   side-effects: builds ops for the constant in pl_yylval.op
12904
12905   Read a number in any of the formats that Perl accepts:
12906
12907   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12908   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12909   0b[01](_?[01])*
12910   0[0-7](_?[0-7])*
12911   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12912
12913   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12914   thing it reads.
12915
12916   If it reads a number without a decimal point or an exponent, it will
12917   try converting the number to an integer and see if it can do so
12918   without loss of precision.
12919 */
12920
12921 char *
12922 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12923 {
12924     dVAR;
12925     register const char *s = start;     /* current position in buffer */
12926     register char *d;                   /* destination in temp buffer */
12927     register char *e;                   /* end of temp buffer */
12928     NV nv;                              /* number read, as a double */
12929     SV *sv = NULL;                      /* place to put the converted number */
12930     bool floatit;                       /* boolean: int or float? */
12931     const char *lastub = NULL;          /* position of last underbar */
12932     static char const number_too_long[] = "Number too long";
12933
12934     PERL_ARGS_ASSERT_SCAN_NUM;
12935
12936     /* We use the first character to decide what type of number this is */
12937
12938     switch (*s) {
12939     default:
12940       Perl_croak(aTHX_ "panic: scan_num");
12941
12942     /* if it starts with a 0, it could be an octal number, a decimal in
12943        0.13 disguise, or a hexadecimal number, or a binary number. */
12944     case '0':
12945         {
12946           /* variables:
12947              u          holds the "number so far"
12948              shift      the power of 2 of the base
12949                         (hex == 4, octal == 3, binary == 1)
12950              overflowed was the number more than we can hold?
12951
12952              Shift is used when we add a digit.  It also serves as an "are
12953              we in octal/hex/binary?" indicator to disallow hex characters
12954              when in octal mode.
12955            */
12956             NV n = 0.0;
12957             UV u = 0;
12958             I32 shift;
12959             bool overflowed = FALSE;
12960             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12961             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12962             static const char* const bases[5] =
12963               { "", "binary", "", "octal", "hexadecimal" };
12964             static const char* const Bases[5] =
12965               { "", "Binary", "", "Octal", "Hexadecimal" };
12966             static const char* const maxima[5] =
12967               { "",
12968                 "0b11111111111111111111111111111111",
12969                 "",
12970                 "037777777777",
12971                 "0xffffffff" };
12972             const char *base, *Base, *max;
12973
12974             /* check for hex */
12975             if (s[1] == 'x') {
12976                 shift = 4;
12977                 s += 2;
12978                 just_zero = FALSE;
12979             } else if (s[1] == 'b') {
12980                 shift = 1;
12981                 s += 2;
12982                 just_zero = FALSE;
12983             }
12984             /* check for a decimal in disguise */
12985             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12986                 goto decimal;
12987             /* so it must be octal */
12988             else {
12989                 shift = 3;
12990                 s++;
12991             }
12992
12993             if (*s == '_') {
12994                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12995                                "Misplaced _ in number");
12996                lastub = s++;
12997             }
12998
12999             base = bases[shift];
13000             Base = Bases[shift];
13001             max  = maxima[shift];
13002
13003             /* read the rest of the number */
13004             for (;;) {
13005                 /* x is used in the overflow test,
13006                    b is the digit we're adding on. */
13007                 UV x, b;
13008
13009                 switch (*s) {
13010
13011                 /* if we don't mention it, we're done */
13012                 default:
13013                     goto out;
13014
13015                 /* _ are ignored -- but warned about if consecutive */
13016                 case '_':
13017                     if (lastub && s == lastub + 1)
13018                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13019                                        "Misplaced _ in number");
13020                     lastub = s++;
13021                     break;
13022
13023                 /* 8 and 9 are not octal */
13024                 case '8': case '9':
13025                     if (shift == 3)
13026                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13027                     /* FALL THROUGH */
13028
13029                 /* octal digits */
13030                 case '2': case '3': case '4':
13031                 case '5': case '6': case '7':
13032                     if (shift == 1)
13033                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13034                     /* FALL THROUGH */
13035
13036                 case '0': case '1':
13037                     b = *s++ & 15;              /* ASCII digit -> value of digit */
13038                     goto digit;
13039
13040                 /* hex digits */
13041                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13042                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13043                     /* make sure they said 0x */
13044                     if (shift != 4)
13045                         goto out;
13046                     b = (*s++ & 7) + 9;
13047
13048                     /* Prepare to put the digit we have onto the end
13049                        of the number so far.  We check for overflows.
13050                     */
13051
13052                   digit:
13053                     just_zero = FALSE;
13054                     if (!overflowed) {
13055                         x = u << shift; /* make room for the digit */
13056
13057                         if ((x >> shift) != u
13058                             && !(PL_hints & HINT_NEW_BINARY)) {
13059                             overflowed = TRUE;
13060                             n = (NV) u;
13061                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13062                                              "Integer overflow in %s number",
13063                                              base);
13064                         } else
13065                             u = x | b;          /* add the digit to the end */
13066                     }
13067                     if (overflowed) {
13068                         n *= nvshift[shift];
13069                         /* If an NV has not enough bits in its
13070                          * mantissa to represent an UV this summing of
13071                          * small low-order numbers is a waste of time
13072                          * (because the NV cannot preserve the
13073                          * low-order bits anyway): we could just
13074                          * remember when did we overflow and in the
13075                          * end just multiply n by the right
13076                          * amount. */
13077                         n += (NV) b;
13078                     }
13079                     break;
13080                 }
13081             }
13082
13083           /* if we get here, we had success: make a scalar value from
13084              the number.
13085           */
13086           out:
13087
13088             /* final misplaced underbar check */
13089             if (s[-1] == '_') {
13090                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13091             }
13092
13093             if (overflowed) {
13094                 if (n > 4294967295.0)
13095                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13096                                    "%s number > %s non-portable",
13097                                    Base, max);
13098                 sv = newSVnv(n);
13099             }
13100             else {
13101 #if UVSIZE > 4
13102                 if (u > 0xffffffff)
13103                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13104                                    "%s number > %s non-portable",
13105                                    Base, max);
13106 #endif
13107                 sv = newSVuv(u);
13108             }
13109             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13110                 sv = new_constant(start, s - start, "integer",
13111                                   sv, NULL, NULL, 0);
13112             else if (PL_hints & HINT_NEW_BINARY)
13113                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13114         }
13115         break;
13116
13117     /*
13118       handle decimal numbers.
13119       we're also sent here when we read a 0 as the first digit
13120     */
13121     case '1': case '2': case '3': case '4': case '5':
13122     case '6': case '7': case '8': case '9': case '.':
13123       decimal:
13124         d = PL_tokenbuf;
13125         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13126         floatit = FALSE;
13127
13128         /* read next group of digits and _ and copy into d */
13129         while (isDIGIT(*s) || *s == '_') {
13130             /* skip underscores, checking for misplaced ones
13131                if -w is on
13132             */
13133             if (*s == '_') {
13134                 if (lastub && s == lastub + 1)
13135                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13136                                    "Misplaced _ in number");
13137                 lastub = s++;
13138             }
13139             else {
13140                 /* check for end of fixed-length buffer */
13141                 if (d >= e)
13142                     Perl_croak(aTHX_ number_too_long);
13143                 /* if we're ok, copy the character */
13144                 *d++ = *s++;
13145             }
13146         }
13147
13148         /* final misplaced underbar check */
13149         if (lastub && s == lastub + 1) {
13150             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13151         }
13152
13153         /* read a decimal portion if there is one.  avoid
13154            3..5 being interpreted as the number 3. followed
13155            by .5
13156         */
13157         if (*s == '.' && s[1] != '.') {
13158             floatit = TRUE;
13159             *d++ = *s++;
13160
13161             if (*s == '_') {
13162                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13163                                "Misplaced _ in number");
13164                 lastub = s;
13165             }
13166
13167             /* copy, ignoring underbars, until we run out of digits.
13168             */
13169             for (; isDIGIT(*s) || *s == '_'; s++) {
13170                 /* fixed length buffer check */
13171                 if (d >= e)
13172                     Perl_croak(aTHX_ number_too_long);
13173                 if (*s == '_') {
13174                    if (lastub && s == lastub + 1)
13175                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13176                                       "Misplaced _ in number");
13177                    lastub = s;
13178                 }
13179                 else
13180                     *d++ = *s;
13181             }
13182             /* fractional part ending in underbar? */
13183             if (s[-1] == '_') {
13184                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13185                                "Misplaced _ in number");
13186             }
13187             if (*s == '.' && isDIGIT(s[1])) {
13188                 /* oops, it's really a v-string, but without the "v" */
13189                 s = start;
13190                 goto vstring;
13191             }
13192         }
13193
13194         /* read exponent part, if present */
13195         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13196             floatit = TRUE;
13197             s++;
13198
13199             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13200             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
13201
13202             /* stray preinitial _ */
13203             if (*s == '_') {
13204                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13205                                "Misplaced _ in number");
13206                 lastub = s++;
13207             }
13208
13209             /* allow positive or negative exponent */
13210             if (*s == '+' || *s == '-')
13211                 *d++ = *s++;
13212
13213             /* stray initial _ */
13214             if (*s == '_') {
13215                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13216                                "Misplaced _ in number");
13217                 lastub = s++;
13218             }
13219
13220             /* read digits of exponent */
13221             while (isDIGIT(*s) || *s == '_') {
13222                 if (isDIGIT(*s)) {
13223                     if (d >= e)
13224                         Perl_croak(aTHX_ number_too_long);
13225                     *d++ = *s++;
13226                 }
13227                 else {
13228                    if (((lastub && s == lastub + 1) ||
13229                         (!isDIGIT(s[1]) && s[1] != '_')))
13230                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13231                                       "Misplaced _ in number");
13232                    lastub = s++;
13233                 }
13234             }
13235         }
13236
13237
13238         /*
13239            We try to do an integer conversion first if no characters
13240            indicating "float" have been found.
13241          */
13242
13243         if (!floatit) {
13244             UV uv;
13245             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13246
13247             if (flags == IS_NUMBER_IN_UV) {
13248               if (uv <= IV_MAX)
13249                 sv = newSViv(uv); /* Prefer IVs over UVs. */
13250               else
13251                 sv = newSVuv(uv);
13252             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13253               if (uv <= (UV) IV_MIN)
13254                 sv = newSViv(-(IV)uv);
13255               else
13256                 floatit = TRUE;
13257             } else
13258               floatit = TRUE;
13259         }
13260         if (floatit) {
13261             /* terminate the string */
13262             *d = '\0';
13263             nv = Atof(PL_tokenbuf);
13264             sv = newSVnv(nv);
13265         }
13266
13267         if ( floatit
13268              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13269             const char *const key = floatit ? "float" : "integer";
13270             const STRLEN keylen = floatit ? 5 : 7;
13271             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13272                                 key, keylen, sv, NULL, NULL, 0);
13273         }
13274         break;
13275
13276     /* if it starts with a v, it could be a v-string */
13277     case 'v':
13278 vstring:
13279                 sv = newSV(5); /* preallocate storage space */
13280                 s = scan_vstring(s, PL_bufend, sv);
13281         break;
13282     }
13283
13284     /* make the op for the constant and return */
13285
13286     if (sv)
13287         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13288     else
13289         lvalp->opval = NULL;
13290
13291     return (char *)s;
13292 }
13293
13294 STATIC char *
13295 S_scan_formline(pTHX_ register char *s)
13296 {
13297     dVAR;
13298     register char *eol;
13299     register char *t;
13300     SV * const stuff = newSVpvs("");
13301     bool needargs = FALSE;
13302     bool eofmt = FALSE;
13303 #ifdef PERL_MAD
13304     char *tokenstart = s;
13305     SV* savewhite = NULL;
13306
13307     if (PL_madskills) {
13308         savewhite = PL_thiswhite;
13309         PL_thiswhite = 0;
13310     }
13311 #endif
13312
13313     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13314
13315     while (!needargs) {
13316         if (*s == '.') {
13317             t = s+1;
13318 #ifdef PERL_STRICT_CR
13319             while (SPACE_OR_TAB(*t))
13320                 t++;
13321 #else
13322             while (SPACE_OR_TAB(*t) || *t == '\r')
13323                 t++;
13324 #endif
13325             if (*t == '\n' || t == PL_bufend) {
13326                 eofmt = TRUE;
13327                 break;
13328             }
13329         }
13330         if (PL_in_eval && !PL_rsfp) {
13331             eol = (char *) memchr(s,'\n',PL_bufend-s);
13332             if (!eol++)
13333                 eol = PL_bufend;
13334         }
13335         else
13336             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13337         if (*s != '#') {
13338             for (t = s; t < eol; t++) {
13339                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13340                     needargs = FALSE;
13341                     goto enough;        /* ~~ must be first line in formline */
13342                 }
13343                 if (*t == '@' || *t == '^')
13344                     needargs = TRUE;
13345             }
13346             if (eol > s) {
13347                 sv_catpvn(stuff, s, eol-s);
13348 #ifndef PERL_STRICT_CR
13349                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13350                     char *end = SvPVX(stuff) + SvCUR(stuff);
13351                     end[-2] = '\n';
13352                     end[-1] = '\0';
13353                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13354                 }
13355 #endif
13356             }
13357             else
13358               break;
13359         }
13360         s = (char*)eol;
13361         if (PL_rsfp) {
13362             bool got_some;
13363 #ifdef PERL_MAD
13364             if (PL_madskills) {
13365                 if (PL_thistoken)
13366                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13367                 else
13368                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13369             }
13370 #endif
13371             PL_bufptr = PL_bufend;
13372             CopLINE_inc(PL_curcop);
13373             got_some = lex_next_chunk(0);
13374             CopLINE_dec(PL_curcop);
13375             s = PL_bufptr;
13376 #ifdef PERL_MAD
13377             tokenstart = PL_bufptr;
13378 #endif
13379             if (!got_some)
13380                 break;
13381         }
13382         incline(s);
13383     }
13384   enough:
13385     if (SvCUR(stuff)) {
13386         PL_expect = XTERM;
13387         if (needargs) {
13388             PL_lex_state = LEX_NORMAL;
13389             start_force(PL_curforce);
13390             NEXTVAL_NEXTTOKE.ival = 0;
13391             force_next(',');
13392         }
13393         else
13394             PL_lex_state = LEX_FORMLINE;
13395         if (!IN_BYTES) {
13396             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13397                 SvUTF8_on(stuff);
13398             else if (PL_encoding)
13399                 sv_recode_to_utf8(stuff, PL_encoding);
13400         }
13401         start_force(PL_curforce);
13402         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13403         force_next(THING);
13404         start_force(PL_curforce);
13405         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13406         force_next(LSTOP);
13407     }
13408     else {
13409         SvREFCNT_dec(stuff);
13410         if (eofmt)
13411             PL_lex_formbrack = 0;
13412         PL_bufptr = s;
13413     }
13414 #ifdef PERL_MAD
13415     if (PL_madskills) {
13416         if (PL_thistoken)
13417             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13418         else
13419             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13420         PL_thiswhite = savewhite;
13421     }
13422 #endif
13423     return s;
13424 }
13425
13426 I32
13427 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13428 {
13429     dVAR;
13430     const I32 oldsavestack_ix = PL_savestack_ix;
13431     CV* const outsidecv = PL_compcv;
13432
13433     if (PL_compcv) {
13434         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13435     }
13436     SAVEI32(PL_subline);
13437     save_item(PL_subname);
13438     SAVESPTR(PL_compcv);
13439
13440     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13441     CvFLAGS(PL_compcv) |= flags;
13442
13443     PL_subline = CopLINE(PL_curcop);
13444     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13445     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13446     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13447
13448     return oldsavestack_ix;
13449 }
13450
13451 #ifdef __SC__
13452 #pragma segment Perl_yylex
13453 #endif
13454 static int
13455 S_yywarn(pTHX_ const char *const s)
13456 {
13457     dVAR;
13458
13459     PERL_ARGS_ASSERT_YYWARN;
13460
13461     PL_in_eval |= EVAL_WARNONLY;
13462     yyerror(s);
13463     PL_in_eval &= ~EVAL_WARNONLY;
13464     return 0;
13465 }
13466
13467 int
13468 Perl_yyerror(pTHX_ const char *const s)
13469 {
13470     dVAR;
13471     const char *where = NULL;
13472     const char *context = NULL;
13473     int contlen = -1;
13474     SV *msg;
13475     int yychar  = PL_parser->yychar;
13476
13477     PERL_ARGS_ASSERT_YYERROR;
13478
13479     if (!yychar || (yychar == ';' && !PL_rsfp))
13480         where = "at EOF";
13481     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13482       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13483       PL_oldbufptr != PL_bufptr) {
13484         /*
13485                 Only for NetWare:
13486                 The code below is removed for NetWare because it abends/crashes on NetWare
13487                 when the script has error such as not having the closing quotes like:
13488                     if ($var eq "value)
13489                 Checking of white spaces is anyway done in NetWare code.
13490         */
13491 #ifndef NETWARE
13492         while (isSPACE(*PL_oldoldbufptr))
13493             PL_oldoldbufptr++;
13494 #endif
13495         context = PL_oldoldbufptr;
13496         contlen = PL_bufptr - PL_oldoldbufptr;
13497     }
13498     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13499       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13500         /*
13501                 Only for NetWare:
13502                 The code below is removed for NetWare because it abends/crashes on NetWare
13503                 when the script has error such as not having the closing quotes like:
13504                     if ($var eq "value)
13505                 Checking of white spaces is anyway done in NetWare code.
13506         */
13507 #ifndef NETWARE
13508         while (isSPACE(*PL_oldbufptr))
13509             PL_oldbufptr++;
13510 #endif
13511         context = PL_oldbufptr;
13512         contlen = PL_bufptr - PL_oldbufptr;
13513     }
13514     else if (yychar > 255)
13515         where = "next token ???";
13516     else if (yychar == -2) { /* YYEMPTY */
13517         if (PL_lex_state == LEX_NORMAL ||
13518            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13519             where = "at end of line";
13520         else if (PL_lex_inpat)
13521             where = "within pattern";
13522         else
13523             where = "within string";
13524     }
13525     else {
13526         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13527         if (yychar < 32)
13528             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13529         else if (isPRINT_LC(yychar)) {
13530             const char string = yychar;
13531             sv_catpvn(where_sv, &string, 1);
13532         }
13533         else
13534             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13535         where = SvPVX_const(where_sv);
13536     }
13537     msg = sv_2mortal(newSVpv(s, 0));
13538     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13539         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13540     if (context)
13541         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13542     else
13543         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13544     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13545         Perl_sv_catpvf(aTHX_ msg,
13546         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13547                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13548         PL_multi_end = 0;
13549     }
13550     if (PL_in_eval & EVAL_WARNONLY) {
13551         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13552     }
13553     else
13554         qerror(msg);
13555     if (PL_error_count >= 10) {
13556         if (PL_in_eval && SvCUR(ERRSV))
13557             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13558                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13559         else
13560             Perl_croak(aTHX_ "%s has too many errors.\n",
13561             OutCopFILE(PL_curcop));
13562     }
13563     PL_in_my = 0;
13564     PL_in_my_stash = NULL;
13565     return 0;
13566 }
13567 #ifdef __SC__
13568 #pragma segment Main
13569 #endif
13570
13571 STATIC char*
13572 S_swallow_bom(pTHX_ U8 *s)
13573 {
13574     dVAR;
13575     const STRLEN slen = SvCUR(PL_linestr);
13576
13577     PERL_ARGS_ASSERT_SWALLOW_BOM;
13578
13579     switch (s[0]) {
13580     case 0xFF:
13581         if (s[1] == 0xFE) {
13582             /* UTF-16 little-endian? (or UTF-32LE?) */
13583             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13584                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13585 #ifndef PERL_NO_UTF16_FILTER
13586             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13587             s += 2;
13588             if (PL_bufend > (char*)s) {
13589                 s = add_utf16_textfilter(s, TRUE);
13590             }
13591 #else
13592             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13593 #endif
13594         }
13595         break;
13596     case 0xFE:
13597         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13598 #ifndef PERL_NO_UTF16_FILTER
13599             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13600             s += 2;
13601             if (PL_bufend > (char *)s) {
13602                 s = add_utf16_textfilter(s, FALSE);
13603             }
13604 #else
13605             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13606 #endif
13607         }
13608         break;
13609     case 0xEF:
13610         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13611             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13612             s += 3;                      /* UTF-8 */
13613         }
13614         break;
13615     case 0:
13616         if (slen > 3) {
13617              if (s[1] == 0) {
13618                   if (s[2] == 0xFE && s[3] == 0xFF) {
13619                        /* UTF-32 big-endian */
13620                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13621                   }
13622              }
13623              else if (s[2] == 0 && s[3] != 0) {
13624                   /* Leading bytes
13625                    * 00 xx 00 xx
13626                    * are a good indicator of UTF-16BE. */
13627 #ifndef PERL_NO_UTF16_FILTER
13628                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13629                   s = add_utf16_textfilter(s, FALSE);
13630 #else
13631                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13632 #endif
13633              }
13634         }
13635 #ifdef EBCDIC
13636     case 0xDD:
13637         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13638             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13639             s += 4;                      /* UTF-8 */
13640         }
13641         break;
13642 #endif
13643
13644     default:
13645          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13646                   /* Leading bytes
13647                    * xx 00 xx 00
13648                    * are a good indicator of UTF-16LE. */
13649 #ifndef PERL_NO_UTF16_FILTER
13650               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13651               s = add_utf16_textfilter(s, TRUE);
13652 #else
13653               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13654 #endif
13655          }
13656     }
13657     return (char*)s;
13658 }
13659
13660
13661 #ifndef PERL_NO_UTF16_FILTER
13662 static I32
13663 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13664 {
13665     dVAR;
13666     SV *const filter = FILTER_DATA(idx);
13667     /* We re-use this each time round, throwing the contents away before we
13668        return.  */
13669     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13670     SV *const utf8_buffer = filter;
13671     IV status = IoPAGE(filter);
13672     const bool reverse = cBOOL(IoLINES(filter));
13673     I32 retval;
13674
13675     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13676
13677     /* As we're automatically added, at the lowest level, and hence only called
13678        from this file, we can be sure that we're not called in block mode. Hence
13679        don't bother writing code to deal with block mode.  */
13680     if (maxlen) {
13681         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13682     }
13683     if (status < 0) {
13684         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13685     }
13686     DEBUG_P(PerlIO_printf(Perl_debug_log,
13687                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13688                           FPTR2DPTR(void *, S_utf16_textfilter),
13689                           reverse ? 'l' : 'b', idx, maxlen, status,
13690                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13691
13692     while (1) {
13693         STRLEN chars;
13694         STRLEN have;
13695         I32 newlen;
13696         U8 *end;
13697         /* First, look in our buffer of existing UTF-8 data:  */
13698         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13699
13700         if (nl) {
13701             ++nl;
13702         } else if (status == 0) {
13703             /* EOF */
13704             IoPAGE(filter) = 0;
13705             nl = SvEND(utf8_buffer);
13706         }
13707         if (nl) {
13708             STRLEN got = nl - SvPVX(utf8_buffer);
13709             /* Did we have anything to append?  */
13710             retval = got != 0;
13711             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13712             /* Everything else in this code works just fine if SVp_POK isn't
13713                set.  This, however, needs it, and we need it to work, else
13714                we loop infinitely because the buffer is never consumed.  */
13715             sv_chop(utf8_buffer, nl);
13716             break;
13717         }
13718
13719         /* OK, not a complete line there, so need to read some more UTF-16.
13720            Read an extra octect if the buffer currently has an odd number. */
13721         while (1) {
13722             if (status <= 0)
13723                 break;
13724             if (SvCUR(utf16_buffer) >= 2) {
13725                 /* Location of the high octet of the last complete code point.
13726                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13727                    *coupled* with all the benefits of partial reads and
13728                    endianness.  */
13729                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13730                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13731
13732                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13733                     break;
13734                 }
13735
13736                 /* We have the first half of a surrogate. Read more.  */
13737                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13738             }
13739
13740             status = FILTER_READ(idx + 1, utf16_buffer,
13741                                  160 + (SvCUR(utf16_buffer) & 1));
13742             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13743             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13744             if (status < 0) {
13745                 /* Error */
13746                 IoPAGE(filter) = status;
13747                 return status;
13748             }
13749         }
13750
13751         chars = SvCUR(utf16_buffer) >> 1;
13752         have = SvCUR(utf8_buffer);
13753         SvGROW(utf8_buffer, have + chars * 3 + 1);
13754
13755         if (reverse) {
13756             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13757                                          (U8*)SvPVX_const(utf8_buffer) + have,
13758                                          chars * 2, &newlen);
13759         } else {
13760             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13761                                 (U8*)SvPVX_const(utf8_buffer) + have,
13762                                 chars * 2, &newlen);
13763         }
13764         SvCUR_set(utf8_buffer, have + newlen);
13765         *end = '\0';
13766
13767         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13768            it's private to us, and utf16_to_utf8{,reversed} take a
13769            (pointer,length) pair, rather than a NUL-terminated string.  */
13770         if(SvCUR(utf16_buffer) & 1) {
13771             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13772             SvCUR_set(utf16_buffer, 1);
13773         } else {
13774             SvCUR_set(utf16_buffer, 0);
13775         }
13776     }
13777     DEBUG_P(PerlIO_printf(Perl_debug_log,
13778                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13779                           status,
13780                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13781     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13782     return retval;
13783 }
13784
13785 static U8 *
13786 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13787 {
13788     SV *filter = filter_add(S_utf16_textfilter, NULL);
13789
13790     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13791
13792     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13793     sv_setpvs(filter, "");
13794     IoLINES(filter) = reversed;
13795     IoPAGE(filter) = 1; /* Not EOF */
13796
13797     /* Sadly, we have to return a valid pointer, come what may, so we have to
13798        ignore any error return from this.  */
13799     SvCUR_set(PL_linestr, 0);
13800     if (FILTER_READ(0, PL_linestr, 0)) {
13801         SvUTF8_on(PL_linestr);
13802     } else {
13803         SvUTF8_on(PL_linestr);
13804     }
13805     PL_bufend = SvEND(PL_linestr);
13806     return (U8*)SvPVX(PL_linestr);
13807 }
13808 #endif
13809
13810 /*
13811 Returns a pointer to the next character after the parsed
13812 vstring, as well as updating the passed in sv.
13813
13814 Function must be called like
13815
13816         sv = newSV(5);
13817         s = scan_vstring(s,e,sv);
13818
13819 where s and e are the start and end of the string.
13820 The sv should already be large enough to store the vstring
13821 passed in, for performance reasons.
13822
13823 */
13824
13825 char *
13826 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13827 {
13828     dVAR;
13829     const char *pos = s;
13830     const char *start = s;
13831
13832     PERL_ARGS_ASSERT_SCAN_VSTRING;
13833
13834     if (*pos == 'v') pos++;  /* get past 'v' */
13835     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13836         pos++;
13837     if ( *pos != '.') {
13838         /* this may not be a v-string if followed by => */
13839         const char *next = pos;
13840         while (next < e && isSPACE(*next))
13841             ++next;
13842         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13843             /* return string not v-string */
13844             sv_setpvn(sv,(char *)s,pos-s);
13845             return (char *)pos;
13846         }
13847     }
13848
13849     if (!isALPHA(*pos)) {
13850         U8 tmpbuf[UTF8_MAXBYTES+1];
13851
13852         if (*s == 'v')
13853             s++;  /* get past 'v' */
13854
13855         sv_setpvs(sv, "");
13856
13857         for (;;) {
13858             /* this is atoi() that tolerates underscores */
13859             U8 *tmpend;
13860             UV rev = 0;
13861             const char *end = pos;
13862             UV mult = 1;
13863             while (--end >= s) {
13864                 if (*end != '_') {
13865                     const UV orev = rev;
13866                     rev += (*end - '0') * mult;
13867                     mult *= 10;
13868                     if (orev > rev)
13869                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13870                                          "Integer overflow in decimal number");
13871                 }
13872             }
13873 #ifdef EBCDIC
13874             if (rev > 0x7FFFFFFF)
13875                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13876 #endif
13877             /* Append native character for the rev point */
13878             tmpend = uvchr_to_utf8(tmpbuf, rev);
13879             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13880             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13881                  SvUTF8_on(sv);
13882             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13883                  s = ++pos;
13884             else {
13885                  s = pos;
13886                  break;
13887             }
13888             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13889                  pos++;
13890         }
13891         SvPOK_on(sv);
13892         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13893         SvRMAGICAL_on(sv);
13894     }
13895     return (char *)s;
13896 }
13897
13898 int
13899 Perl_keyword_plugin_standard(pTHX_
13900         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13901 {
13902     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13903     PERL_UNUSED_CONTEXT;
13904     PERL_UNUSED_ARG(keyword_ptr);
13905     PERL_UNUSED_ARG(keyword_len);
13906     PERL_UNUSED_ARG(op_ptr);
13907     return KEYWORD_PLUGIN_DECLINE;
13908 }
13909
13910 /*
13911  * Local variables:
13912  * c-indentation-style: bsd
13913  * c-basic-offset: 4
13914  * indent-tabs-mode: t
13915  * End:
13916  *
13917  * ex: set ts=8 sts=4 sw=4 noet:
13918  */