/* toke.c
*
- * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
+ * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*/
/*
- * "It all comes from here, the stench and the peril." --Frodo
+ * 'It all comes from here, the stench and the peril.' --Frodo
+ *
+ * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
*/
/*
* The main routine is yylex(), which returns the next token.
*/
+/*
+=head1 Lexer interface
+
+This is the lower layer of the Perl parser, managing characters and tokens.
+
+=for apidoc AmU|yy_parser *|PL_parser
+
+Pointer to a structure encapsulating the state of the parsing operation
+currently in progress. The pointer can be locally changed to perform
+a nested parse without interfering with the state of an outer parse.
+Individual members of C<PL_parser> have their own documentation.
+
+=cut
+*/
+
#include "EXTERN.h"
#define PERL_IN_TOKE_C
#include "perl.h"
-#define yylval (PL_parser->yylval)
+#define new_constant(a,b,c,d,e,f,g) \
+ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
+
+#define pl_yylval (PL_parser->yylval)
/* YYINITDEPTH -- initial size of the parser's stacks. */
#define YYINITDEPTH 200
#define PL_pending_ident (PL_parser->pending_ident)
#define PL_preambled (PL_parser->preambled)
#define PL_sublex_info (PL_parser->sublex_info)
+#define PL_linestr (PL_parser->linestr)
+#define PL_expect (PL_parser->expect)
+#define PL_copline (PL_parser->copline)
+#define PL_bufptr (PL_parser->bufptr)
+#define PL_oldbufptr (PL_parser->oldbufptr)
+#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
+#define PL_linestart (PL_parser->linestart)
+#define PL_bufend (PL_parser->bufend)
+#define PL_last_uni (PL_parser->last_uni)
+#define PL_last_lop (PL_parser->last_lop)
+#define PL_last_lop_op (PL_parser->last_lop_op)
+#define PL_lex_state (PL_parser->lex_state)
+#define PL_rsfp (PL_parser->rsfp)
+#define PL_rsfp_filters (PL_parser->rsfp_filters)
+#define PL_in_my (PL_parser->in_my)
+#define PL_in_my_stash (PL_parser->in_my_stash)
+#define PL_tokenbuf (PL_parser->tokenbuf)
+#define PL_multi_end (PL_parser->multi_end)
+#define PL_error_count (PL_parser->error_count)
#ifdef PERL_MAD
# define PL_endwhite (PL_parser->endwhite)
# define PL_thisstuff (PL_parser->thisstuff)
# define PL_thistoken (PL_parser->thistoken)
# define PL_thiswhite (PL_parser->thiswhite)
+# define PL_thiswhite (PL_parser->thiswhite)
+# define PL_nexttoke (PL_parser->nexttoke)
+# define PL_curforce (PL_parser->curforce)
+#else
+# define PL_nexttoke (PL_parser->nexttoke)
+# define PL_nexttype (PL_parser->nexttype)
+# define PL_nextval (PL_parser->nextval)
#endif
+/* This can't be done with embed.fnc, because struct yy_parser contains a
+ member named pending_ident, which clashes with the generated #define */
static int
S_pending_ident(pTHX);
static const char ident_too_long[] = "Identifier too long";
-static const char commaless_variable_list[] = "comma-less variable list";
-
-static void restore_rsfp(pTHX_ void *f);
-#ifndef PERL_NO_UTF16_FILTER
-static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
-static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
-#endif
#ifdef PERL_MAD
# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
#endif
+/* The maximum number of characters preceding the unrecognized one to display */
+#define UNRECOGNIZED_PRECEDE_COUNT 10
+
/* In variables named $^X, these are the legal values for X.
* 1999-02-27 mjd-perl-patch@plover.com */
#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
-/* On MacOS, respect nonbreaking spaces */
-#ifdef MACOS_TRADITIONAL
-#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
-#else
#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
-#endif
/* LEX_* are values for PL_lex_state, the state of the lexer.
* They are arranged oddly so that the guard on the switch statement
*/
#ifdef DEBUGGING /* Serve -DT. */
-# define REPORT(retval) tokereport((I32)retval)
+# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
#else
# define REPORT(retval) (retval)
#endif
#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
-#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
-#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
-#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
-#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
-#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
-#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
-#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
-#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
-#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
-#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
-#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
-#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
-#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
+#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
+#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
+#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
+#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
+#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
+#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
+#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
+#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
+#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
+#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
/* This bit of chicanery makes a unary function followed by
* a parenthesis into a function with one argument, highest precedence.
* operator (such as C<shift // 0>).
*/
#define UNI2(f,x) { \
- yylval.ival = f; \
+ pl_yylval.ival = f; \
PL_expect = x; \
PL_bufptr = s; \
PL_last_uni = PL_oldbufptr; \
#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
#define UNIBRACK(f) { \
- yylval.ival = f; \
+ pl_yylval.ival = f; \
PL_bufptr = s; \
PL_last_uni = PL_oldbufptr; \
if (*s == '(') \
}
/* grandfather return to old style */
-#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
+#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
#ifdef DEBUGGING
-/* how to interpret the yylval associated with the token */
+/* how to interpret the pl_yylval associated with the token */
enum token_type {
TOKENTYPE_NONE,
TOKENTYPE_IVAL,
- TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
+ TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
TOKENTYPE_PVAL,
TOKENTYPE_OPVAL,
TOKENTYPE_GVVAL
{ OROP, TOKENTYPE_IVAL, "OROP" },
{ OROR, TOKENTYPE_NONE, "OROR" },
{ PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
+ { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
+ { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
{ PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
{ POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
{ POSTINC, TOKENTYPE_NONE, "POSTINC" },
{ WHEN, TOKENTYPE_IVAL, "WHEN" },
{ WHILE, TOKENTYPE_IVAL, "WHILE" },
{ WORD, TOKENTYPE_OPVAL, "WORD" },
+ { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
{ 0, TOKENTYPE_NONE, NULL }
};
-/* dump the returned token in rv, plus any optional arg in yylval */
+/* dump the returned token in rv, plus any optional arg in pl_yylval */
STATIC int
-S_tokereport(pTHX_ I32 rv)
+S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
{
dVAR;
+
+ PERL_ARGS_ASSERT_TOKEREPORT;
+
if (DEBUG_T_TEST) {
const char *name = NULL;
enum token_type type = TOKENTYPE_NONE;
case TOKENTYPE_GVVAL: /* doesn't appear to be used */
break;
case TOKENTYPE_IVAL:
- Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
+ Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
break;
case TOKENTYPE_OPNUM:
Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
- PL_op_name[yylval.ival]);
+ PL_op_name[lvalp->ival]);
break;
case TOKENTYPE_PVAL:
- Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
+ Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
break;
case TOKENTYPE_OPVAL:
- if (yylval.opval) {
+ if (lvalp->opval) {
Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
- PL_op_name[yylval.opval->op_type]);
- if (yylval.opval->op_type == OP_CONST) {
+ PL_op_name[lvalp->opval->op_type]);
+ if (lvalp->opval->op_type == OP_CONST) {
Perl_sv_catpvf(aTHX_ report, " %s",
- SvPEEK(cSVOPx_sv(yylval.opval)));
+ SvPEEK(cSVOPx_sv(lvalp->opval)));
}
}
/* print the buffer with suitable escapes */
STATIC void
-S_printbuf(pTHX_ const char* fmt, const char* s)
+S_printbuf(pTHX_ const char *const fmt, const char *const s)
{
SV* const tmp = newSVpvs("");
+
+ PERL_ARGS_ASSERT_PRINTBUF;
+
PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
SvREFCNT_dec(tmp);
}
#endif
+static int
+S_deprecate_commaless_var_list(pTHX) {
+ PL_expect = XTERM;
+ deprecate("comma-less variable list");
+ return REPORT(','); /* grandfather non-comma-format format */
+}
+
/*
* S_ao
*
if (*PL_bufptr == '=') {
PL_bufptr++;
if (toketype == ANDAND)
- yylval.ival = OP_ANDASSIGN;
+ pl_yylval.ival = OP_ANDASSIGN;
else if (toketype == OROR)
- yylval.ival = OP_ORASSIGN;
+ pl_yylval.ival = OP_ORASSIGN;
else if (toketype == DORDOR)
- yylval.ival = OP_DORASSIGN;
+ pl_yylval.ival = OP_DORASSIGN;
toketype = ASSIGNOP;
}
return toketype;
*/
STATIC void
-S_no_op(pTHX_ const char *what, char *s)
+S_no_op(pTHX_ const char *const what, char *s)
{
dVAR;
char * const oldbp = PL_bufptr;
const bool is_first = (PL_oldbufptr == PL_linestart);
+ PERL_ARGS_ASSERT_NO_OP;
+
if (!s)
s = oldbp;
else
if (nl)
*nl = '\0';
}
- else if (
-#ifdef EBCDIC
- iscntrl(PL_multi_close)
-#else
- PL_multi_close < 32 || PL_multi_close == 127
-#endif
- ) {
+ else if (isCNTRL(PL_multi_close)) {
*tmpbuf = '^';
tmpbuf[1] = (char)toCTRL(PL_multi_close);
tmpbuf[2] = '\0';
#define FEATURE_IS_ENABLED(name) \
((0 != (PL_hints & HINT_LOCALIZE_HH)) \
&& S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
+/* The longest string we pass in. */
+#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
+
/*
* S_feature_is_enabled
* Check whether the named feature is enabled.
*/
STATIC bool
-S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
+S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
{
dVAR;
HV * const hinthv = GvHV(PL_hintgv);
- char he_name[32] = "feature_";
- (void) my_strlcpy(&he_name[8], name, 24);
+ char he_name[8 + MAX_FEATURE_LEN] = "feature_";
- return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
-}
-
-/*
- * Perl_deprecate
- */
+ PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
-void
-Perl_deprecate(pTHX_ const char *s)
-{
- if (ckWARN(WARN_DEPRECATED))
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
-}
+ assert(namelen <= MAX_FEATURE_LEN);
+ memcpy(&he_name[8], name, namelen);
-void
-Perl_deprecate_old(pTHX_ const char *s)
-{
- /* This function should NOT be called for any new deprecated warnings */
- /* Use Perl_deprecate instead */
- /* */
- /* It is here to maintain backward compatibility with the pre-5.8 */
- /* warnings category hierarchy. The "deprecated" category used to */
- /* live under the "syntax" category. It is now a top-level category */
- /* in its own right. */
-
- if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
- "Use of %s is deprecated", s);
+ return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
}
/*
{
register const char *s = SvPVX_const(sv);
register const char * const e = s + SvCUR(sv);
+
+ PERL_ARGS_ASSERT_STRIP_RETURN;
+
/* outer loop optimized to do nothing if there are no CR-LFs */
while (s < e) {
if (*s++ == '\r' && *s == '\n') {
/*
* Perl_lex_start
- * Initialize variables. Uses the Perl save_stack to save its state (for
- * recursive calls to the parser).
+ *
+ * Create a parser object and initialise its parser and lexer fields
+ *
+ * rsfp is the opened file handle to read from (if any),
+ *
+ * line holds any initial content already read from the file (or in
+ * the case of no file, such as an eval, the whole contents);
+ *
+ * new_filter indicates that this is a new file and it shouldn't inherit
+ * the filters from the current parser (ie require).
*/
void
-Perl_lex_start(pTHX_ SV *line)
+Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
{
dVAR;
const char *s = NULL;
STRLEN len;
- yy_parser *parser;
+ yy_parser *parser, *oparser;
/* create and initialise a parser */
Newxz(parser, 1, yy_parser);
- parser->old_parser = PL_parser;
+ parser->old_parser = oparser = PL_parser;
PL_parser = parser;
Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
parser->yyerrstatus = 0;
parser->yychar = YYEMPTY; /* Cause a token to be read. */
+ /* on scope exit, free this parser and restore any outer one */
+ SAVEPARSER(parser);
+ parser->saved_curcop = PL_curcop;
+
/* initialise lexer state */
- SAVEI32(PL_lex_state);
#ifdef PERL_MAD
- if (PL_lex_state == LEX_KNOWNEXT) {
- I32 toke = parser->old_parser->lasttoke;
- while (--toke >= 0) {
- SAVEI32(PL_nexttoke[toke].next_type);
- SAVEVPTR(PL_nexttoke[toke].next_val);
- if (PL_madskills)
- SAVEVPTR(PL_nexttoke[toke].next_mad);
- }
- }
- SAVEI32(PL_curforce);
+ parser->curforce = -1;
#else
- if (PL_lex_state == LEX_KNOWNEXT) {
- I32 toke = PL_nexttoke;
- while (--toke >= 0) {
- SAVEI32(PL_nexttype[toke]);
- SAVEVPTR(PL_nextval[toke]);
- }
- SAVEI32(PL_nexttoke);
- }
+ parser->nexttoke = 0;
#endif
- SAVECOPLINE(PL_curcop);
- SAVEPPTR(PL_bufptr);
- SAVEPPTR(PL_bufend);
- SAVEPPTR(PL_oldbufptr);
- SAVEPPTR(PL_oldoldbufptr);
- SAVEPPTR(PL_last_lop);
- SAVEPPTR(PL_last_uni);
- SAVEPPTR(PL_linestart);
- SAVESPTR(PL_linestr);
- SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
- SAVEINT(PL_expect);
+ parser->error_count = oparser ? oparser->error_count : 0;
+ parser->copline = NOLINE;
+ parser->lex_state = LEX_NORMAL;
+ parser->expect = XSTATE;
+ parser->rsfp = rsfp;
+ parser->rsfp_filters = (new_filter || !oparser) ? newAV()
+ : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
- PL_copline = NOLINE;
- PL_lex_state = LEX_NORMAL;
- PL_expect = XSTATE;
Newx(parser->lex_brackstack, 120, char);
Newx(parser->lex_casestack, 12, char);
*parser->lex_casestack = '\0';
-#ifndef PERL_MAD
- PL_nexttoke = 0;
-#endif
if (line) {
s = SvPV_const(line, len);
} else {
len = 0;
}
+
if (!len) {
- PL_linestr = newSVpvs("\n;");
+ parser->linestr = newSVpvs("\n;");
} else if (SvREADONLY(line) || s[len-1] != ';') {
- PL_linestr = newSVsv(line);
+ parser->linestr = newSVsv(line);
if (s[len-1] != ';')
- sv_catpvs(PL_linestr, "\n;");
+ sv_catpvs(parser->linestr, "\n;");
} else {
SvTEMP_off(line);
SvREFCNT_inc_simple_void_NN(line);
- PL_linestr = line;
- }
- /* PL_linestr needs to survive until end of scope, not just the next
- FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
- SAVEFREESV(PL_linestr);
- PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
- PL_bufend = PL_bufptr + SvCUR(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
- PL_rsfp = 0;
+ parser->linestr = line;
+ }
+ parser->oldoldbufptr =
+ parser->oldbufptr =
+ parser->bufptr =
+ parser->linestart = SvPVX(parser->linestr);
+ parser->bufend = parser->bufptr + SvCUR(parser->linestr);
+ parser->last_lop = parser->last_uni = NULL;
}
+
+/* delete a parser object */
+
+void
+Perl_parser_free(pTHX_ const yy_parser *parser)
+{
+ PERL_ARGS_ASSERT_PARSER_FREE;
+
+ PL_curcop = parser->saved_curcop;
+ SvREFCNT_dec(parser->linestr);
+
+ if (parser->rsfp == PerlIO_stdin())
+ PerlIO_clearerr(parser->rsfp);
+ else if (parser->rsfp && (!parser->old_parser ||
+ (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
+ PerlIO_close(parser->rsfp);
+ SvREFCNT_dec(parser->rsfp_filters);
+
+ Safefree(parser->stack);
+ Safefree(parser->lex_brackstack);
+ Safefree(parser->lex_casestack);
+ PL_parser = parser->old_parser;
+ Safefree(parser);
+}
+
+
/*
* Perl_lex_end
* Finalizer for lexing operations. Must be called when the parser is
}
/*
+=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
+
+Buffer scalar containing the chunk currently under consideration of the
+text currently being lexed. This is always a plain string scalar (for
+which C<SvPOK> is true). It is not intended to be used as a scalar by
+normal scalar means; instead refer to the buffer directly by the pointer
+variables described below.
+
+The lexer maintains various C<char*> pointers to things in the
+C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
+reallocated, all of these pointers must be updated. Don't attempt to
+do this manually, but rather use L</lex_grow_linestr> if you need to
+reallocate the buffer.
+
+The content of the text chunk in the buffer is commonly exactly one
+complete line of input, up to and including a newline terminator,
+but there are situations where it is otherwise. The octets of the
+buffer may be intended to be interpreted as either UTF-8 or Latin-1.
+The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
+flag on this scalar, which may disagree with it.
+
+For direct examination of the buffer, the variable
+L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
+lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
+of these pointers is usually preferable to examination of the scalar
+through normal scalar means.
+
+=for apidoc AmxU|char *|PL_parser-E<gt>bufend
+
+Direct pointer to the end of the chunk of text currently being lexed, the
+end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
++ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
+always located at the end of the buffer, and does not count as part of
+the buffer's contents.
+
+=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
+
+Points to the current position of lexing inside the lexer buffer.
+Characters around this point may be freely examined, within
+the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
+L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
+interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
+
+Lexing code (whether in the Perl core or not) moves this pointer past
+the characters that it consumes. It is also expected to perform some
+bookkeeping whenever a newline character is consumed. This movement
+can be more conveniently performed by the function L</lex_read_to>,
+which handles newlines appropriately.
+
+Interpretation of the buffer's octets can be abstracted out by
+using the slightly higher-level functions L</lex_peek_unichar> and
+L</lex_read_unichar>.
+
+=for apidoc AmxU|char *|PL_parser-E<gt>linestart
+
+Points to the start of the current line inside the lexer buffer.
+This is useful for indicating at which column an error occurred, and
+not much else. This must be updated by any lexing code that consumes
+a newline; the function L</lex_read_to> handles this detail.
+
+=cut
+*/
+
+/*
+=for apidoc Amx|bool|lex_bufutf8
+
+Indicates whether the octets in the lexer buffer
+(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
+of Unicode characters. If not, they should be interpreted as Latin-1
+characters. This is analogous to the C<SvUTF8> flag for scalars.
+
+In UTF-8 mode, it is not guaranteed that the lexer buffer actually
+contains valid UTF-8. Lexing code must be robust in the face of invalid
+encoding.
+
+The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
+is significant, but not the whole story regarding the input character
+encoding. Normally, when a file is being read, the scalar contains octets
+and its C<SvUTF8> flag is off, but the octets should be interpreted as
+UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
+however, the scalar may have the C<SvUTF8> flag on, and in this case its
+octets should be interpreted as UTF-8 unless the C<use bytes> pragma
+is in effect. This logic may change in the future; use this function
+instead of implementing the logic yourself.
+
+=cut
+*/
+
+bool
+Perl_lex_bufutf8(pTHX)
+{
+ return UTF;
+}
+
+/*
+=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
+
+Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
+at least I<len> octets (including terminating NUL). Returns a
+pointer to the reallocated buffer. This is necessary before making
+any direct modification of the buffer that would increase its length.
+L</lex_stuff_pvn> provides a more convenient way to insert text into
+the buffer.
+
+Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
+this function updates all of the lexer's variables that point directly
+into the buffer.
+
+=cut
+*/
+
+char *
+Perl_lex_grow_linestr(pTHX_ STRLEN len)
+{
+ SV *linestr;
+ char *buf;
+ STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
+ STRLEN linestart_pos, last_uni_pos, last_lop_pos;
+ linestr = PL_parser->linestr;
+ buf = SvPVX(linestr);
+ if (len <= SvLEN(linestr))
+ return buf;
+ bufend_pos = PL_parser->bufend - buf;
+ bufptr_pos = PL_parser->bufptr - buf;
+ oldbufptr_pos = PL_parser->oldbufptr - buf;
+ oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+ linestart_pos = PL_parser->linestart - buf;
+ last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
+ last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+ buf = sv_grow(linestr, len);
+ PL_parser->bufend = buf + bufend_pos;
+ PL_parser->bufptr = buf + bufptr_pos;
+ PL_parser->oldbufptr = buf + oldbufptr_pos;
+ PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+ PL_parser->linestart = buf + linestart_pos;
+ if (PL_parser->last_uni)
+ PL_parser->last_uni = buf + last_uni_pos;
+ if (PL_parser->last_lop)
+ PL_parser->last_lop = buf + last_lop_pos;
+ return buf;
+}
+
+/*
+=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
+
+Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
+immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
+reallocating the buffer if necessary. This means that lexing code that
+runs later will see the characters as if they had appeared in the input.
+It is not recommended to do this as part of normal parsing, and most
+uses of this facility run the risk of the inserted characters being
+interpreted in an unintended manner.
+
+The string to be inserted is represented by I<len> octets starting
+at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
+according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
+The characters are recoded for the lexer buffer, according to how the
+buffer is currently being interpreted (L</lex_bufutf8>). If a string
+to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
+function is more convenient.
+
+=cut
+*/
+
+void
+Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
+{
+ dVAR;
+ char *bufptr;
+ PERL_ARGS_ASSERT_LEX_STUFF_PVN;
+ if (flags & ~(LEX_STUFF_UTF8))
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
+ if (UTF) {
+ if (flags & LEX_STUFF_UTF8) {
+ goto plain_copy;
+ } else {
+ STRLEN highhalf = 0;
+ const char *p, *e = pv+len;
+ for (p = pv; p != e; p++)
+ highhalf += !!(((U8)*p) & 0x80);
+ if (!highhalf)
+ goto plain_copy;
+ lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
+ bufptr = PL_parser->bufptr;
+ Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr,
+ SvCUR(PL_parser->linestr) + len+highhalf);
+ PL_parser->bufend += len+highhalf;
+ for (p = pv; p != e; p++) {
+ U8 c = (U8)*p;
+ if (c & 0x80) {
+ *bufptr++ = (char)(0xc0 | (c >> 6));
+ *bufptr++ = (char)(0x80 | (c & 0x3f));
+ } else {
+ *bufptr++ = (char)c;
+ }
+ }
+ }
+ } else {
+ if (flags & LEX_STUFF_UTF8) {
+ STRLEN highhalf = 0;
+ const char *p, *e = pv+len;
+ for (p = pv; p != e; p++) {
+ U8 c = (U8)*p;
+ if (c >= 0xc4) {
+ Perl_croak(aTHX_ "Lexing code attempted to stuff "
+ "non-Latin-1 character into Latin-1 input");
+ } else if (c >= 0xc2 && p+1 != e &&
+ (((U8)p[1]) & 0xc0) == 0x80) {
+ p++;
+ highhalf++;
+ } else if (c >= 0x80) {
+ /* malformed UTF-8 */
+ ENTER;
+ SAVESPTR(PL_warnhook);
+ PL_warnhook = PERL_WARNHOOK_FATAL;
+ utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+ LEAVE;
+ }
+ }
+ if (!highhalf)
+ goto plain_copy;
+ lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
+ bufptr = PL_parser->bufptr;
+ Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr,
+ SvCUR(PL_parser->linestr) + len-highhalf);
+ PL_parser->bufend += len-highhalf;
+ for (p = pv; p != e; p++) {
+ U8 c = (U8)*p;
+ if (c & 0x80) {
+ *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
+ p++;
+ } else {
+ *bufptr++ = (char)c;
+ }
+ }
+ } else {
+ plain_copy:
+ lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
+ bufptr = PL_parser->bufptr;
+ Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
+ PL_parser->bufend += len;
+ Copy(pv, bufptr, len, char);
+ }
+ }
+}
+
+/*
+=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
+
+Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
+immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
+reallocating the buffer if necessary. This means that lexing code that
+runs later will see the characters as if they had appeared in the input.
+It is not recommended to do this as part of normal parsing, and most
+uses of this facility run the risk of the inserted characters being
+interpreted in an unintended manner.
+
+The string to be inserted is the string value of I<sv>. The characters
+are recoded for the lexer buffer, according to how the buffer is currently
+being interpreted (L</lex_bufutf8>). If a string to be interpreted is
+not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
+need to construct a scalar.
+
+=cut
+*/
+
+void
+Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
+{
+ char *pv;
+ STRLEN len;
+ PERL_ARGS_ASSERT_LEX_STUFF_SV;
+ if (flags)
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
+ pv = SvPV(sv, len);
+ lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
+}
+
+/*
+=for apidoc Amx|void|lex_unstuff|char *ptr
+
+Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
+I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
+This hides the discarded text from any lexing code that runs later,
+as if the text had never appeared.
+
+This is not the normal way to consume lexed text. For that, use
+L</lex_read_to>.
+
+=cut
+*/
+
+void
+Perl_lex_unstuff(pTHX_ char *ptr)
+{
+ char *buf, *bufend;
+ STRLEN unstuff_len;
+ PERL_ARGS_ASSERT_LEX_UNSTUFF;
+ buf = PL_parser->bufptr;
+ if (ptr < buf)
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
+ if (ptr == buf)
+ return;
+ bufend = PL_parser->bufend;
+ if (ptr > bufend)
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
+ unstuff_len = ptr - buf;
+ Move(ptr, buf, bufend+1-ptr, char);
+ SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
+ PL_parser->bufend = bufend - unstuff_len;
+}
+
+/*
+=for apidoc Amx|void|lex_read_to|char *ptr
+
+Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
+to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
+performing the correct bookkeeping whenever a newline character is passed.
+This is the normal way to consume lexed text.
+
+Interpretation of the buffer's octets can be abstracted out by
+using the slightly higher-level functions L</lex_peek_unichar> and
+L</lex_read_unichar>.
+
+=cut
+*/
+
+void
+Perl_lex_read_to(pTHX_ char *ptr)
+{
+ char *s;
+ PERL_ARGS_ASSERT_LEX_READ_TO;
+ s = PL_parser->bufptr;
+ if (ptr < s || ptr > PL_parser->bufend)
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
+ for (; s != ptr; s++)
+ if (*s == '\n') {
+ CopLINE_inc(PL_curcop);
+ PL_parser->linestart = s+1;
+ }
+ PL_parser->bufptr = ptr;
+}
+
+/*
+=for apidoc Amx|void|lex_discard_to|char *ptr
+
+Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
+up to I<ptr>. The remaining content of the buffer will be moved, and
+all pointers into the buffer updated appropriately. I<ptr> must not
+be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
+it is not permitted to discard text that has yet to be lexed.
+
+Normally it is not necessarily to do this directly, because it suffices to
+use the implicit discarding behaviour of L</lex_next_chunk> and things
+based on it. However, if a token stretches across multiple lines,
+and the lexing code has kept multiple lines of text in the buffer for
+that purpose, then after completion of the token it would be wise to
+explicitly discard the now-unneeded earlier lines, to avoid future
+multi-line tokens growing the buffer without bound.
+
+=cut
+*/
+
+void
+Perl_lex_discard_to(pTHX_ char *ptr)
+{
+ char *buf;
+ STRLEN discard_len;
+ PERL_ARGS_ASSERT_LEX_DISCARD_TO;
+ buf = SvPVX(PL_parser->linestr);
+ if (ptr < buf)
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
+ if (ptr == buf)
+ return;
+ if (ptr > PL_parser->bufptr)
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
+ discard_len = ptr - buf;
+ if (PL_parser->oldbufptr < ptr)
+ PL_parser->oldbufptr = ptr;
+ if (PL_parser->oldoldbufptr < ptr)
+ PL_parser->oldoldbufptr = ptr;
+ if (PL_parser->last_uni && PL_parser->last_uni < ptr)
+ PL_parser->last_uni = NULL;
+ if (PL_parser->last_lop && PL_parser->last_lop < ptr)
+ PL_parser->last_lop = NULL;
+ Move(ptr, buf, PL_parser->bufend+1-ptr, char);
+ SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
+ PL_parser->bufend -= discard_len;
+ PL_parser->bufptr -= discard_len;
+ PL_parser->oldbufptr -= discard_len;
+ PL_parser->oldoldbufptr -= discard_len;
+ if (PL_parser->last_uni)
+ PL_parser->last_uni -= discard_len;
+ if (PL_parser->last_lop)
+ PL_parser->last_lop -= discard_len;
+}
+
+/*
+=for apidoc Amx|bool|lex_next_chunk|U32 flags
+
+Reads in the next chunk of text to be lexed, appending it to
+L</PL_parser-E<gt>linestr>. This should be called when lexing code has
+looked to the end of the current chunk and wants to know more. It is
+usual, but not necessary, for lexing to have consumed the entirety of
+the current chunk at this time.
+
+If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
+chunk (i.e., the current chunk has been entirely consumed), normally the
+current chunk will be discarded at the same time that the new chunk is
+read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
+will not be discarded. If the current chunk has not been entirely
+consumed, then it will not be discarded regardless of the flag.
+
+Returns true if some new text was added to the buffer, or false if the
+buffer has reached the end of the input text.
+
+=cut
+*/
+
+#define LEX_FAKE_EOF 0x80000000
+
+bool
+Perl_lex_next_chunk(pTHX_ U32 flags)
+{
+ SV *linestr;
+ char *buf;
+ STRLEN old_bufend_pos, new_bufend_pos;
+ STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
+ STRLEN linestart_pos, last_uni_pos, last_lop_pos;
+ bool got_some_for_debugger = 0;
+ bool got_some;
+ if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
+ linestr = PL_parser->linestr;
+ buf = SvPVX(linestr);
+ if (!(flags & LEX_KEEP_PREVIOUS) &&
+ PL_parser->bufptr == PL_parser->bufend) {
+ old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
+ linestart_pos = 0;
+ if (PL_parser->last_uni != PL_parser->bufend)
+ PL_parser->last_uni = NULL;
+ if (PL_parser->last_lop != PL_parser->bufend)
+ PL_parser->last_lop = NULL;
+ last_uni_pos = last_lop_pos = 0;
+ *buf = 0;
+ SvCUR(linestr) = 0;
+ } else {
+ old_bufend_pos = PL_parser->bufend - buf;
+ bufptr_pos = PL_parser->bufptr - buf;
+ oldbufptr_pos = PL_parser->oldbufptr - buf;
+ oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+ linestart_pos = PL_parser->linestart - buf;
+ last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
+ last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+ }
+ if (flags & LEX_FAKE_EOF) {
+ goto eof;
+ } else if (!PL_parser->rsfp) {
+ got_some = 0;
+ } else if (filter_gets(linestr, old_bufend_pos)) {
+ got_some = 1;
+ got_some_for_debugger = 1;
+ } else {
+ if (!SvPOK(linestr)) /* can get undefined by filter_gets */
+ sv_setpvs(linestr, "");
+ eof:
+ /* End of real input. Close filehandle (unless it was STDIN),
+ * then add implicit termination.
+ */
+ if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
+ PerlIO_clearerr(PL_parser->rsfp);
+ else if (PL_parser->rsfp)
+ (void)PerlIO_close(PL_parser->rsfp);
+ PL_parser->rsfp = NULL;
+ PL_doextract = FALSE;
+#ifdef PERL_MAD
+ if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
+ PL_faketokens = 1;
+#endif
+ if (!PL_in_eval && PL_minus_p) {
+ sv_catpvs(linestr,
+ /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
+ PL_minus_n = PL_minus_p = 0;
+ } else if (!PL_in_eval && PL_minus_n) {
+ sv_catpvs(linestr, /*{*/";}");
+ PL_minus_n = 0;
+ } else
+ sv_catpvs(linestr, ";");
+ got_some = 1;
+ }
+ buf = SvPVX(linestr);
+ new_bufend_pos = SvCUR(linestr);
+ PL_parser->bufend = buf + new_bufend_pos;
+ PL_parser->bufptr = buf + bufptr_pos;
+ PL_parser->oldbufptr = buf + oldbufptr_pos;
+ PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+ PL_parser->linestart = buf + linestart_pos;
+ if (PL_parser->last_uni)
+ PL_parser->last_uni = buf + last_uni_pos;
+ if (PL_parser->last_lop)
+ PL_parser->last_lop = buf + last_lop_pos;
+ if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
+ PL_curstash != PL_debstash) {
+ /* debugger active and we're not compiling the debugger code,
+ * so store the line into the debugger's array of lines
+ */
+ update_debugger_info(NULL, buf+old_bufend_pos,
+ new_bufend_pos-old_bufend_pos);
+ }
+ return got_some;
+}
+
+/*
+=for apidoc Amx|I32|lex_peek_unichar|U32 flags
+
+Looks ahead one (Unicode) character in the text currently being lexed.
+Returns the codepoint (unsigned integer value) of the next character,
+or -1 if lexing has reached the end of the input text. To consume the
+peeked character, use L</lex_read_unichar>.
+
+If the next character is in (or extends into) the next chunk of input
+text, the next chunk will be read in. Normally the current chunk will be
+discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
+then the current chunk will not be discarded.
+
+If the input is being interpreted as UTF-8 and a UTF-8 encoding error
+is encountered, an exception is generated.
+
+=cut
+*/
+
+I32
+Perl_lex_peek_unichar(pTHX_ U32 flags)
+{
+ dVAR;
+ char *s, *bufend;
+ if (flags & ~(LEX_KEEP_PREVIOUS))
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
+ s = PL_parser->bufptr;
+ bufend = PL_parser->bufend;
+ if (UTF) {
+ U8 head;
+ I32 unichar;
+ STRLEN len, retlen;
+ if (s == bufend) {
+ if (!lex_next_chunk(flags))
+ return -1;
+ s = PL_parser->bufptr;
+ bufend = PL_parser->bufend;
+ }
+ head = (U8)*s;
+ if (!(head & 0x80))
+ return head;
+ if (head & 0x40) {
+ len = PL_utf8skip[head];
+ while ((STRLEN)(bufend-s) < len) {
+ if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
+ break;
+ s = PL_parser->bufptr;
+ bufend = PL_parser->bufend;
+ }
+ }
+ unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+ if (retlen == (STRLEN)-1) {
+ /* malformed UTF-8 */
+ ENTER;
+ SAVESPTR(PL_warnhook);
+ PL_warnhook = PERL_WARNHOOK_FATAL;
+ utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+ LEAVE;
+ }
+ return unichar;
+ } else {
+ if (s == bufend) {
+ if (!lex_next_chunk(flags))
+ return -1;
+ s = PL_parser->bufptr;
+ }
+ return (U8)*s;
+ }
+}
+
+/*
+=for apidoc Amx|I32|lex_read_unichar|U32 flags
+
+Reads the next (Unicode) character in the text currently being lexed.
+Returns the codepoint (unsigned integer value) of the character read,
+and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
+if lexing has reached the end of the input text. To non-destructively
+examine the next character, use L</lex_peek_unichar> instead.
+
+If the next character is in (or extends into) the next chunk of input
+text, the next chunk will be read in. Normally the current chunk will be
+discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
+then the current chunk will not be discarded.
+
+If the input is being interpreted as UTF-8 and a UTF-8 encoding error
+is encountered, an exception is generated.
+
+=cut
+*/
+
+I32
+Perl_lex_read_unichar(pTHX_ U32 flags)
+{
+ I32 c;
+ if (flags & ~(LEX_KEEP_PREVIOUS))
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
+ c = lex_peek_unichar(flags);
+ if (c != -1) {
+ if (c == '\n')
+ CopLINE_inc(PL_curcop);
+ PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
+ }
+ return c;
+}
+
+/*
+=for apidoc Amx|void|lex_read_space|U32 flags
+
+Reads optional spaces, in Perl style, in the text currently being
+lexed. The spaces may include ordinary whitespace characters and
+Perl-style comments. C<#line> directives are processed if encountered.
+L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
+at a non-space character (or the end of the input text).
+
+If spaces extend into the next chunk of input text, the next chunk will
+be read in. Normally the current chunk will be discarded at the same
+time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
+chunk will not be discarded.
+
+=cut
+*/
+
+#define LEX_NO_NEXT_CHUNK 0x80000000
+
+void
+Perl_lex_read_space(pTHX_ U32 flags)
+{
+ char *s, *bufend;
+ bool need_incline = 0;
+ if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
+ Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
+#ifdef PERL_MAD
+ if (PL_skipwhite) {
+ sv_free(PL_skipwhite);
+ PL_skipwhite = NULL;
+ }
+ if (PL_madskills)
+ PL_skipwhite = newSVpvs("");
+#endif /* PERL_MAD */
+ s = PL_parser->bufptr;
+ bufend = PL_parser->bufend;
+ while (1) {
+ char c = *s;
+ if (c == '#') {
+ do {
+ c = *++s;
+ } while (!(c == '\n' || (c == 0 && s == bufend)));
+ } else if (c == '\n') {
+ s++;
+ PL_parser->linestart = s;
+ if (s == bufend)
+ need_incline = 1;
+ else
+ incline(s);
+ } else if (isSPACE(c)) {
+ s++;
+ } else if (c == 0 && s == bufend) {
+ bool got_more;
+#ifdef PERL_MAD
+ if (PL_madskills)
+ sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
+#endif /* PERL_MAD */
+ if (flags & LEX_NO_NEXT_CHUNK)
+ break;
+ PL_parser->bufptr = s;
+ CopLINE_inc(PL_curcop);
+ got_more = lex_next_chunk(flags);
+ CopLINE_dec(PL_curcop);
+ s = PL_parser->bufptr;
+ bufend = PL_parser->bufend;
+ if (!got_more)
+ break;
+ if (need_incline && PL_parser->rsfp) {
+ incline(s);
+ need_incline = 0;
+ }
+ } else {
+ break;
+ }
+ }
+#ifdef PERL_MAD
+ if (PL_madskills)
+ sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
+#endif /* PERL_MAD */
+ PL_parser->bufptr = s;
+}
+
+/*
* S_incline
* This subroutine has nothing to do with tilting, whether at windmills
* or pinball tables. Its name is short for "increment line". It
const char *n;
const char *e;
+ PERL_ARGS_ASSERT_INCLINE;
+
CopLINE_inc(PL_curcop);
if (*s++ != '#')
return;
n = s;
while (isDIGIT(*s))
s++;
+ if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
+ return;
while (SPACE_OR_TAB(*s))
s++;
if (*s == '"' && (t = strchr(s+1, '"'))) {
if (t - s > 0) {
const STRLEN len = t - s;
#ifndef USE_ITHREADS
- const char * const cf = CopFILE(PL_curcop);
- STRLEN tmplen = cf ? strlen(cf) : 0;
+ SV *const temp_sv = CopFILESV(PL_curcop);
+ const char *cf;
+ STRLEN tmplen;
+
+ if (temp_sv) {
+ cf = SvPVX(temp_sv);
+ tmplen = SvCUR(temp_sv);
+ } else {
+ cf = NULL;
+ tmplen = 0;
+ }
+
if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
/* must copy *{"::_<(eval N)[oldfilename:L]"}
* to *{"::_<newfilename"} */
gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
/* adjust ${"::_<newfilename"} to store the new file name */
GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
- GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
- GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
+ GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
+ GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
}
if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
STATIC char *
S_skipspace0(pTHX_ register char *s)
{
+ PERL_ARGS_ASSERT_SKIPSPACE0;
+
s = skipspace(s);
if (!PL_madskills)
return s;
const char *start = s;
I32 startoff = start - SvPVX(PL_linestr);
+ PERL_ARGS_ASSERT_SKIPSPACE1;
+
s = skipspace(s);
if (!PL_madskills)
return s;
const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
const I32 startoff = s - SvPVX(PL_linestr);
+ PERL_ARGS_ASSERT_SKIPSPACE2;
+
s = skipspace(s);
PL_bufptr = SvPVX(PL_linestr) + bufptroff;
if (!PL_madskills || !svp)
#endif
STATIC void
-S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
+S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
{
AV *av = CopFILEAVx(PL_curcop);
if (av) {
STATIC char *
S_skipspace(pTHX_ register char *s)
{
- dVAR;
#ifdef PERL_MAD
- int curoff;
- int startoff = s - SvPVX(PL_linestr);
-
+ char *start = s;
+#endif /* PERL_MAD */
+ PERL_ARGS_ASSERT_SKIPSPACE;
+#ifdef PERL_MAD
if (PL_skipwhite) {
sv_free(PL_skipwhite);
- PL_skipwhite = 0;
+ PL_skipwhite = NULL;
}
-#endif
-
+#endif /* PERL_MAD */
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
while (s < PL_bufend && SPACE_OR_TAB(*s))
s++;
-#ifdef PERL_MAD
- goto done;
-#else
- return s;
-#endif
- }
- for (;;) {
- STRLEN prevlen;
- SSize_t oldprevlen, oldoldprevlen;
- SSize_t oldloplen = 0, oldunilen = 0;
- while (s < PL_bufend && isSPACE(*s)) {
- if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
- incline(s);
- }
-
- /* comment */
- if (s < PL_bufend && *s == '#') {
- while (s < PL_bufend && *s != '\n')
- s++;
- if (s < PL_bufend) {
- s++;
- if (PL_in_eval && !PL_rsfp) {
- incline(s);
- continue;
- }
- }
- }
-
- /* only continue to recharge the buffer if we're at the end
- * of the buffer, we're not reading from a source filter, and
- * we're in normal lexing mode
- */
- if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
- PL_lex_state == LEX_FORMLINE)
-#ifdef PERL_MAD
- goto done;
-#else
- return s;
-#endif
-
- /* try to recharge the buffer */
-#ifdef PERL_MAD
- curoff = s - SvPVX(PL_linestr);
-#endif
-
- if ((s = filter_gets(PL_linestr, PL_rsfp,
- (prevlen = SvCUR(PL_linestr)))) == NULL)
- {
-#ifdef PERL_MAD
- if (PL_madskills && curoff != startoff) {
- if (!PL_skipwhite)
- PL_skipwhite = newSVpvs("");
- sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
- curoff - startoff);
- }
-
- /* mustn't throw out old stuff yet if madpropping */
- SvCUR(PL_linestr) = curoff;
- s = SvPVX(PL_linestr) + curoff;
- *s = 0;
- if (curoff && s[-1] == '\n')
- s[-1] = ' ';
-#endif
-
- /* end of file. Add on the -p or -n magic */
- /* XXX these shouldn't really be added here, can't set PL_faketokens */
- if (PL_minus_p) {
-#ifdef PERL_MAD
- sv_catpvs(PL_linestr,
- ";}continue{print or die qq(-p destination: $!\\n);}");
-#else
- sv_setpvs(PL_linestr,
- ";}continue{print or die qq(-p destination: $!\\n);}");
-#endif
- PL_minus_n = PL_minus_p = 0;
- }
- else if (PL_minus_n) {
-#ifdef PERL_MAD
- sv_catpvn(PL_linestr, ";}", 2);
-#else
- sv_setpvn(PL_linestr, ";}", 2);
-#endif
- PL_minus_n = 0;
- }
- else
-#ifdef PERL_MAD
- sv_catpvn(PL_linestr,";", 1);
-#else
- sv_setpvn(PL_linestr,";", 1);
-#endif
-
- /* reset variables for next time we lex */
- PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
- = SvPVX(PL_linestr)
-#ifdef PERL_MAD
- + curoff
-#endif
- ;
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
-
- /* Close the filehandle. Could be from -P preprocessor,
- * STDIN, or a regular file. If we were reading code from
- * STDIN (because the commandline held no -e or filename)
- * then we don't close it, we reset it so the code can
- * read from STDIN too.
- */
-
- if (PL_preprocess && !PL_in_eval)
- (void)PerlProc_pclose(PL_rsfp);
- else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
- PerlIO_clearerr(PL_rsfp);
- else
- (void)PerlIO_close(PL_rsfp);
- PL_rsfp = NULL;
- return s;
- }
-
- /* not at end of file, so we only read another line */
- /* make corresponding updates to old pointers, for yyerror() */
- oldprevlen = PL_oldbufptr - PL_bufend;
- oldoldprevlen = PL_oldoldbufptr - PL_bufend;
- if (PL_last_uni)
- oldunilen = PL_last_uni - PL_bufend;
- if (PL_last_lop)
- oldloplen = PL_last_lop - PL_bufend;
- PL_linestart = PL_bufptr = s + prevlen;
- PL_bufend = s + SvCUR(PL_linestr);
+ } else {
+ STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
+ PL_bufptr = s;
+ lex_read_space(LEX_KEEP_PREVIOUS |
+ (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
+ LEX_NO_NEXT_CHUNK : 0));
s = PL_bufptr;
- PL_oldbufptr = s + oldprevlen;
- PL_oldoldbufptr = s + oldoldprevlen;
- if (PL_last_uni)
- PL_last_uni = s + oldunilen;
- if (PL_last_lop)
- PL_last_lop = s + oldloplen;
- incline(s);
-
- /* debugger active and we're not compiling the debugger code,
- * so store the line into the debugger's array of lines
- */
- if (PERLDB_LINE && PL_curstash != PL_debstash)
- update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
+ PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
+ if (PL_linestart > PL_bufptr)
+ PL_bufptr = PL_linestart;
+ return s;
}
-
#ifdef PERL_MAD
- done:
- if (PL_madskills) {
- if (!PL_skipwhite)
- PL_skipwhite = newSVpvs("");
- curoff = s - SvPVX(PL_linestr);
- if (curoff - startoff)
- sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
- curoff - startoff);
- }
+ if (PL_madskills)
+ PL_skipwhite = newSVpvn(start, s-start);
+#endif /* PERL_MAD */
return s;
-#endif
}
/*
if ((t = strchr(s, '(')) && t < PL_bufptr)
return;
- if (ckWARN_d(WARN_AMBIGUOUS)){
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Warning: Use of \"%.*s\" without parentheses is ambiguous",
- (int)(s - PL_last_uni), PL_last_uni);
- }
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Warning: Use of \"%.*s\" without parentheses is ambiguous",
+ (int)(s - PL_last_uni), PL_last_uni);
}
/*
S_lop(pTHX_ I32 f, int x, char *s)
{
dVAR;
- yylval.ival = f;
+
+ PERL_ARGS_ASSERT_LOP;
+
+ pl_yylval.ival = f;
CLINE;
PL_expect = x;
PL_bufptr = s;
where = &PL_nexttoke[PL_curforce].next_mad;
if (PL_faketokens)
- sv_setpvn(sv, "", 0);
+ sv_setpvs(sv, "");
else {
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
/* keep a slot open for the head of the list? */
if (slot != '_' && *where && (*where)->mad_key == '^') {
(*where)->mad_key = slot;
- sv_free((*where)->mad_val);
+ sv_free(MUTABLE_SV(((*where)->mad_val)));
(*where)->mad_val = (void*)sv;
}
else
S_force_next(pTHX_ I32 type)
{
dVAR;
+#ifdef DEBUGGING
+ if (DEBUG_T_TEST) {
+ PerlIO_printf(Perl_debug_log, "### forced token:\n");
+ tokereport(type, &NEXTVAL_NEXTTOKE);
+ }
+#endif
#ifdef PERL_MAD
if (PL_curforce < 0)
start_force(PL_lasttoke);
}
STATIC SV *
-S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
+S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
{
dVAR;
- SV * const sv = newSVpvn(start,len);
- if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
- SvUTF8_on(sv);
+ SV * const sv = newSVpvn_utf8(start, len,
+ !IN_BYTES
+ && UTF
+ && !is_ascii_string((const U8*)start, len)
+ && is_utf8_string((const U8*)start, len));
return sv;
}
register char *s;
STRLEN len;
+ PERL_ARGS_ASSERT_FORCE_WORD;
+
start = SKIPSPACE1(start);
s = start;
if (isIDFIRST_lazy_if(s,UTF) ||
PL_expect = XOPERATOR;
}
}
+ if (PL_madskills)
+ curmad('g', newSVpvs( "forced" ));
NEXTVAL_NEXTTOKE.opval
= (OP*)newSVOP(OP_CONST,0,
S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
S_force_ident(pTHX_ register const char *s, int kind)
{
dVAR;
+
+ PERL_ARGS_ASSERT_FORCE_IDENT;
+
if (*s) {
const STRLEN len = strlen(s);
OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
const char *start = SvPV_const(sv,len);
const char * const end = start + len;
const bool utf = SvUTF8(sv) ? TRUE : FALSE;
+
+ PERL_ARGS_ASSERT_STR_TO_VERSION;
+
while (start < end) {
STRLEN skip;
UV n;
I32 startoff = s - SvPVX(PL_linestr);
#endif
+ PERL_ARGS_ASSERT_FORCE_VERSION;
+
s = SKIPSPACE1(s);
d = s;
#endif
if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
SV *ver;
- s = scan_num(s, &yylval);
- version = yylval.opval;
+#ifdef USE_LOCALE_NUMERIC
+ char *loc = setlocale(LC_NUMERIC, "C");
+#endif
+ s = scan_num(s, &pl_yylval);
+#ifdef USE_LOCALE_NUMERIC
+ setlocale(LC_NUMERIC, loc);
+#endif
+ version = pl_yylval.opval;
ver = cSVOPx(version)->op_sv;
if (SvPOK(ver) && !SvNIOK(ver)) {
SvUPGRADE(ver, SVt_PVNV);
}
/*
+ * S_force_strict_version
+ * Forces the next token to be a version number using strict syntax rules.
+ */
+
+STATIC char *
+S_force_strict_version(pTHX_ char *s)
+{
+ dVAR;
+ OP *version = NULL;
+#ifdef PERL_MAD
+ I32 startoff = s - SvPVX(PL_linestr);
+#endif
+ const char *errstr = NULL;
+
+ PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
+
+ while (isSPACE(*s)) /* leading whitespace */
+ s++;
+
+ if (is_STRICT_VERSION(s,&errstr)) {
+ SV *ver = newSV(0);
+ s = (char *)scan_version(s, ver, 0);
+ version = newSVOP(OP_CONST, 0, ver);
+ }
+ else if ( (*s != ';' && *s != '}' ) && (s = SKIPSPACE1(s), (*s != ';' && *s !='}' ))) {
+ PL_bufptr = s;
+ if (errstr)
+ yyerror(errstr); /* version required */
+ return s;
+ }
+
+#ifdef PERL_MAD
+ if (PL_madskills && !version) {
+ sv_free(PL_nextwhite); /* let next token collect whitespace */
+ PL_nextwhite = 0;
+ s = SvPVX(PL_linestr) + startoff;
+ }
+#endif
+ /* NOTE: The parser sees the package name and the VERSION swapped */
+ start_force(PL_curforce);
+ NEXTVAL_NEXTTOKE.opval = version;
+ force_next(WORD);
+
+ return s;
+}
+
+/*
* S_tokeq
* Tokenize a quoted string passed in as an SV. It finds the next
* chunk, up to end of string or a backslash. It may make a new
STRLEN len = 0;
SV *pv = sv;
+ PERL_ARGS_ASSERT_TOKEQ;
+
if (!SvLEN(sv))
goto finish;
goto finish;
d = s;
if ( PL_hints & HINT_NEW_STRING ) {
- pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
- if (SvUTF8(sv))
- SvUTF8_on(pv);
+ pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
}
while (s < send) {
if (*s == '\\') {
SvCUR_set(sv, d - SvPVX_const(sv));
finish:
if ( PL_hints & HINT_NEW_STRING )
- return new_constant(NULL, 0, "q", sv, pv, "q");
+ return new_constant(NULL, 0, "q", sv, pv, "q", 1);
return sv;
}
/*
* S_sublex_start
- * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
+ * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
*
* Pattern matching will set PL_lex_op to the pattern-matching op to
- * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
+ * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
*
* OP_CONST and OP_READLINE are easy--just make the new op and return.
*
S_sublex_start(pTHX)
{
dVAR;
- register const I32 op_type = yylval.ival;
+ register const I32 op_type = pl_yylval.ival;
if (op_type == OP_NULL) {
- yylval.opval = PL_lex_op;
+ pl_yylval.opval = PL_lex_op;
PL_lex_op = NULL;
return THING;
}
/* Overloaded constants, nothing fancy: Convert to SVt_PV: */
STRLEN len;
const char * const p = SvPV_const(sv, len);
- SV * const nsv = newSVpvn(p, len);
- if (SvUTF8(sv))
- SvUTF8_on(nsv);
+ SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
SvREFCNT_dec(sv);
sv = nsv;
}
- yylval.opval = (OP*)newSVOP(op_type, 0, sv);
+ pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
PL_lex_stuff = NULL;
/* Allow <FH> // "foo" */
if (op_type == OP_READLINE)
else if (op_type == OP_BACKTICK && PL_lex_op) {
/* readpipe() vas overriden */
cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
- yylval.opval = PL_lex_op;
+ pl_yylval.opval = PL_lex_op;
PL_lex_op = NULL;
PL_lex_stuff = NULL;
return THING;
}
PL_sublex_info.super_state = PL_lex_state;
- PL_sublex_info.sub_inwhat = op_type;
+ PL_sublex_info.sub_inwhat = (U16)op_type;
PL_sublex_info.sub_op = PL_lex_op;
PL_lex_state = LEX_INTERPPUSH;
PL_expect = XTERM;
if (PL_lex_op) {
- yylval.opval = PL_lex_op;
+ pl_yylval.opval = PL_lex_op;
PL_lex_op = NULL;
return PMFUNC;
}
ENTER;
PL_lex_state = PL_sublex_info.super_state;
- SAVEI32(PL_lex_dojoin);
+ SAVEBOOL(PL_lex_dojoin);
SAVEI32(PL_lex_brackets);
SAVEI32(PL_lex_casemods);
SAVEI32(PL_lex_starts);
- SAVEI32(PL_lex_state);
+ SAVEI8(PL_lex_state);
SAVEVPTR(PL_lex_inpat);
- SAVEI32(PL_lex_inwhat);
+ SAVEI16(PL_lex_inwhat);
SAVECOPLINE(PL_curcop);
SAVEPPTR(PL_bufptr);
SAVEPPTR(PL_bufend);
if (SvUTF8(PL_linestr))
SvUTF8_on(sv);
PL_expect = XOPERATOR;
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
return THING;
}
PL_thiswhite = 0;
}
if (PL_thistoken)
- sv_setpvn(PL_thistoken,"",0);
+ sv_setpvs(PL_thistoken,"");
else
PL_realtokenstart = -1;
}
Returns a pointer to the character scanned up to. If this is
advanced from the start pointer supplied (i.e. if anything was
successfully parsed), will leave an OP for the substring scanned
- in yylval. Caller must intuit reason for not parsing further
+ in pl_yylval. Caller must intuit reason for not parsing further
by looking at the next characters herself.
In patterns:
backslashes:
- double-quoted style: \r and \n
- regexp special ones: \D \s
- constants: \x31
- backrefs: \1
+ constants: \N{NAME} only
case and quoting: \U \Q \E
stops on @ and $, but not for $ as tail anchor
In double-quoted strings:
backslashes:
double-quoted style: \r and \n
- constants: \x31
+ constants: \x31, etc.
deprecated backrefs: \1 (in substitution replacements)
case and quoting: \U \Q \E
stops on @ and $
check for embedded arrays
check for embedded scalars
if (backslash) {
- leave intact backslashes from leaveit (below)
deprecate \1 in substitution replacements
handle string-changing backslashes \l \U \Q \E, etc.
switch (what was escaped) {
handle \- in a transliteration (becomes a literal -)
+ if a pattern and not \N{, go treat as regular character
handle \132 (octal characters)
handle \x15 and \x{1234} (hex characters)
- handle \N{name} (named characters)
+ handle \N{name} (named characters, also \N{3,5} in a pattern)
handle \cV (control characters)
handle printf-style backslashes (\f, \r, \n, etc)
} (end switch)
+ continue
} (end if backslash)
+ handle regular character
} (end while character to read)
*/
{
dVAR;
register char *send = PL_bufend; /* end of the constant */
- SV *sv = newSV(send - start); /* sv for the constant */
+ SV *sv = newSV(send - start); /* sv for the constant. See
+ note below on sizing. */
register char *s = start; /* start of the constant */
register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
I32 has_utf8 = FALSE; /* Output constant is UTF8 */
- I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
+ I32 this_utf8 = UTF; /* Is the source string assumed
+ to be UTF8? But, this can
+ show as true when the source
+ isn't utf8, as for example
+ when it is entirely composed
+ of hex constants */
+
+ /* Note on sizing: The scanned constant is placed into sv, which is
+ * initialized by newSV() assuming one byte of output for every byte of
+ * input. This routine expects newSV() to allocate an extra byte for a
+ * trailing NUL, which this routine will append if it gets to the end of
+ * the input. There may be more bytes of input than output (eg., \N{LATIN
+ * CAPITAL LETTER A}), or more output than input if the constant ends up
+ * recoded to utf8, but each time a construct is found that might increase
+ * the needed size, SvGROW() is called. Its size parameter each time is
+ * based on the best guess estimate at the time, namely the length used so
+ * far, plus the length the current construct will occupy, plus room for
+ * the trailing NUL, plus one byte for every input byte still unscanned */
+
UV uv;
#ifdef EBCDIC
UV literal_endpoint = 0;
bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
#endif
+ PERL_ARGS_ASSERT_SCAN_CONST;
+
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
/* If we are doing a trans and we know we want UTF8 set expectation */
has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
while (s < send || dorange) {
+
/* get transliterations out of the way (they're most literal) */
if (PL_lex_inwhat == OP_TRANS) {
/* expand a range A-Z to the full set of characters. AIE! */
else if (*s == '$') {
if (!PL_lex_inpat) /* not a regexp, so $ must be var */
break;
- if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
+ if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
+ if (s[1] == '\\') {
+ Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Possible unintended interpolation of $\\ in regex");
+ }
break; /* in regexp, $ might be tail anchor */
+ }
}
/* End of else if chain - OP_TRANS rejoin rest */
/* backslashes */
if (*s == '\\' && s+1 < send) {
+ char* e; /* Can be used for ending '}', etc. */
+
s++;
/* deprecate \1 in strings and substitution replacements */
if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
{
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
*--s = '$';
break;
}
--s;
break;
}
- /* skip any other backslash escapes in a pattern */
- else if (PL_lex_inpat) {
+ /* In a pattern, process \N, but skip any other backslash escapes.
+ * This is because we don't want to translate an escape sequence
+ * into a meta symbol and have the regex compiler use the meta
+ * symbol meaning, e.g. \x{2E} would be confused with a dot. But
+ * in spite of this, we do have to process \N here while the proper
+ * charnames handler is in scope. See bugs #56444 and #62056.
+ * There is a complication because \N in a pattern may also stand
+ * for 'match a non-nl', and not mean a charname, in which case its
+ * processing should be deferred to the regex compiler. To be a
+ * charname it must be followed immediately by a '{', and not look
+ * like \N followed by a curly quantifier, i.e., not something like
+ * \N{3,}. regcurly returns a boolean indicating if it is a legal
+ * quantifier */
+ else if (PL_lex_inpat
+ && (*s != 'N'
+ || s[1] != '{'
+ || regcurly(s + 1)))
+ {
*d++ = NATIVE_TO_NEED(has_utf8,'\\');
goto default_action;
}
- /* if we get here, it's either a quoted -, or a digit */
switch (*s) {
/* quoted - in transliterations */
/* FALL THROUGH */
default:
{
- if ((isALPHA(*s) || isDIGIT(*s)) &&
- ckWARN(WARN_MISC))
- Perl_warner(aTHX_ packWARN(WARN_MISC),
- "Unrecognized escape \\%c passed through",
- *s);
+ if ((isALPHA(*s) || isDIGIT(*s)))
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
+ "Unrecognized escape \\%c passed through",
+ *s);
/* default action is to copy the quoted character */
goto default_action;
}
- /* \132 indicates an octal constant */
+ /* eg. \132 indicates the octal constant 0x132 */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
I32 flags = 0;
STRLEN len = 3;
- uv = grok_oct(s, &len, &flags, NULL);
+ uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
s += len;
}
goto NUM_ESCAPE_INSERT;
- /* \x24 indicates a hex constant */
+ /* eg. \x24 indicates the hex constant 0x24 */
case 'x':
++s;
if (*s == '{') {
continue;
}
len = e - s;
- uv = grok_hex(s, &len, &flags, NULL);
+ uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
s = e + 1;
}
else {
{
STRLEN len = 2;
I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- uv = grok_hex(s, &len, &flags, NULL);
+ uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
s += len;
}
}
NUM_ESCAPE_INSERT:
- /* Insert oct or hex escaped character.
- * There will always enough room in sv since such
- * escapes will be longer than any UTF-8 sequence
- * they can end up as. */
+ /* Insert oct or hex escaped character. There will always be
+ * enough room in sv since such escapes will be longer than any
+ * UTF-8 sequence they can end up as, except if they force us
+ * to recode the rest of the string into utf8 */
- /* We need to map to chars to ASCII before doing the tests
- to cover EBCDIC
- */
- if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
+ /* Here uv is the ordinal of the next character being added in
+ * unicode (converted from native). */
+ if (!UNI_IS_INVARIANT(uv)) {
if (!has_utf8 && uv > 255) {
- /* Might need to recode whatever we have
- * accumulated so far if it contains any
- * hibit chars.
- *
- * (Can't we keep track of that and avoid
- * this rescan? --jhi)
- */
- int hicount = 0;
- U8 *c;
- for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
- if (!NATIVE_IS_INVARIANT(*c)) {
- hicount++;
- }
- }
- if (hicount) {
- const STRLEN offset = d - SvPVX_const(sv);
- U8 *src, *dst;
- d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
- src = (U8 *)d - 1;
- dst = src+hicount;
- d += hicount;
- while (src >= (const U8 *)SvPVX_const(sv)) {
- if (!NATIVE_IS_INVARIANT(*src)) {
- const U8 ch = NATIVE_TO_ASCII(*src);
- *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
- *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
- }
- else {
- *dst-- = *src;
- }
- src--;
- }
- }
+ /* Might need to recode whatever we have accumulated so
+ * far if it contains any chars variant in utf8 or
+ * utf-ebcdic. */
+
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
+ /* See Note on sizing above. */
+ sv_utf8_upgrade_flags_grow(sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ UNISKIP(uv) + (STRLEN)(send - s) + 1);
+ d = SvPVX(sv) + SvCUR(sv);
+ has_utf8 = TRUE;
}
- if (has_utf8 || uv > 255) {
- d = (char*)uvchr_to_utf8((U8*)d, uv);
- has_utf8 = TRUE;
+ if (has_utf8) {
+ d = (char*)uvuni_to_utf8((U8*)d, uv);
if (PL_lex_inwhat == OP_TRANS &&
PL_sublex_info.sub_op) {
PL_sublex_info.sub_op->op_private |=
}
continue;
- /* \N{LATIN SMALL LETTER A} is a named character */
case 'N':
- ++s;
- if (*s == '{') {
- char* e = strchr(s, '}');
- SV *res;
- STRLEN len;
- const char *str;
- SV *type;
-
- if (!e) {
+ /* In a non-pattern \N must be a named character, like \N{LATIN
+ * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
+ * mean to match a non-newline. For non-patterns, named
+ * characters are converted to their string equivalents. In
+ * patterns, named characters are not converted to their
+ * ultimate forms for the same reasons that other escapes
+ * aren't. Instead, they are converted to the \N{U+...} form
+ * to get the value from the charnames that is in effect right
+ * now, while preserving the fact that it was a named character
+ * so that the regex compiler knows this */
+
+ /* This section of code doesn't generally use the
+ * NATIVE_TO_NEED() macro to transform the input. I (khw) did
+ * a close examination of this macro and determined it is a
+ * no-op except on utfebcdic variant characters. Every
+ * character generated by this that would normally need to be
+ * enclosed by this macro is invariant, so the macro is not
+ * needed, and would complicate use of copy(). There are other
+ * parts of this file where the macro is used inconsistently,
+ * but are saved by it being a no-op */
+
+ /* The structure of this section of code (besides checking for
+ * errors and upgrading to utf8) is:
+ * Further disambiguate between the two meanings of \N, and if
+ * not a charname, go process it elsewhere
+ * If of form \N{U+...}, pass it through if a pattern;
+ * otherwise convert to utf8
+ * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
+ * pattern; otherwise convert to utf8 */
+
+ /* Here, s points to the 'N'; the test below is guaranteed to
+ * succeed if we are being called on a pattern as we already
+ * know from a test above that the next character is a '{'.
+ * On a non-pattern \N must mean 'named sequence, which
+ * requires braces */
+ s++;
+ if (*s != '{') {
+ yyerror("Missing braces on \\N{}");
+ continue;
+ }
+ s++;
+
+ /* If there is no matching '}', it is an error. */
+ if (! (e = strchr(s, '}'))) {
+ if (! PL_lex_inpat) {
yyerror("Missing right brace on \\N{}");
- e = s - 1;
- goto cont_scan;
- }
- if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
- /* \N{U+...} */
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
- PERL_SCAN_DISALLOW_PREFIX;
- s += 3;
- len = e - s;
- uv = grok_hex(s, &len, &flags, NULL);
- if ( e > s && len != (STRLEN)(e - s) ) {
- uv = 0xFFFD;
- }
- s = e + 1;
- goto NUM_ESCAPE_INSERT;
+ } else {
+ yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
}
- res = newSVpvn(s + 1, e - s - 1);
- type = newSVpvn(s - 2,e - s + 3);
- res = new_constant( NULL, 0, "charnames",
- res, NULL, SvPVX(type) );
- SvREFCNT_dec(type);
- if (has_utf8)
- sv_utf8_upgrade(res);
- str = SvPV_const(res,len);
-#ifdef EBCDIC_NEVER_MIND
- /* charnames uses pack U and that has been
- * recently changed to do the below uni->native
- * mapping, so this would be redundant (and wrong,
- * the code point would be doubly converted).
- * But leave this in just in case the pack U change
- * gets revoked, but the semantics is still
- * desireable for charnames. --jhi */
- {
- UV uv = utf8_to_uvchr((const U8*)str, 0);
+ continue;
+ }
- if (uv < 0x100) {
- U8 tmpbuf[UTF8_MAXBYTES+1], *d;
+ /* Here it looks like a named character */
- d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
- sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
- str = SvPV_const(res, len);
- }
- }
-#endif
- if (!has_utf8 && SvUTF8(res)) {
- const char * const ostart = SvPVX_const(sv);
- SvCUR_set(sv, d - ostart);
+ if (PL_lex_inpat) {
+
+ /* XXX This block is temporary code. \N{} implies that the
+ * pattern is to have Unicode semantics, and therefore
+ * currently has to be encoded in utf8. By putting it in
+ * utf8 now, we save a whole pass in the regular expression
+ * compiler. Once that code is changed so Unicode
+ * semantics doesn't necessarily have to be in utf8, this
+ * block should be removed */
+ if (!has_utf8) {
+ SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
- sv_utf8_upgrade(sv);
- /* this just broke our allocation above... */
- SvGROW(sv, (STRLEN)(send - start));
+ /* See Note on sizing above. */
+ sv_utf8_upgrade_flags_grow(sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ /* 5 = '\N{' + cur char + NUL */
+ (STRLEN)(send - s) + 5);
d = SvPVX(sv) + SvCUR(sv);
has_utf8 = TRUE;
}
- if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
- const char * const odest = SvPVX_const(sv);
+ }
+
+ if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
+ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+ | PERL_SCAN_DISALLOW_PREFIX;
+ STRLEN len;
+
+ /* For \N{U+...}, the '...' is a unicode value even on
+ * EBCDIC machines */
+ s += 2; /* Skip to next char after the 'U+' */
+ len = e - s;
+ uv = grok_hex(s, &len, &flags, NULL);
+ if (len == 0 || len != (STRLEN)(e - s)) {
+ yyerror("Invalid hexadecimal number in \\N{U+...}");
+ s = e + 1;
+ continue;
+ }
+
+ if (PL_lex_inpat) {
+
+ /* Pass through to the regex compiler unchanged. The
+ * reason we evaluated the number above is to make sure
+ * there wasn't a syntax error. */
+ s -= 5; /* Include the '\N{U+' */
+ Copy(s, d, e - s + 1, char); /* 1 = include the } */
+ d += e - s + 1;
+ }
+ else { /* Not a pattern: convert the hex to string */
+
+ /* If destination is not in utf8, unconditionally
+ * recode it to be so. This is because \N{} implies
+ * Unicode semantics, and scalars have to be in utf8
+ * to guarantee those semantics */
+ if (! has_utf8) {
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
+ /* See Note on sizing above. */
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ UNISKIP(uv) + (STRLEN)(send - e) + 1);
+ d = SvPVX(sv) + SvCUR(sv);
+ has_utf8 = TRUE;
+ }
+
+ /* Add the string to the output */
+ if (UNI_IS_INVARIANT(uv)) {
+ *d++ = (char) uv;
+ }
+ else d = (char*)uvuni_to_utf8((U8*)d, uv);
+ }
+ }
+ else { /* Here is \N{NAME} but not \N{U+...}. */
+
+ SV *res; /* result from charnames */
+ const char *str; /* the string in 'res' */
+ STRLEN len; /* its length */
+
+ /* Get the value for NAME */
+ res = newSVpvn(s, e - s);
+ res = new_constant( NULL, 0, "charnames",
+ /* includes all of: \N{...} */
+ res, NULL, s - 3, e - s + 4 );
+
+ /* Most likely res will be in utf8 already since the
+ * standard charnames uses pack U, but a custom translator
+ * can leave it otherwise, so make sure. XXX This can be
+ * revisited to not have charnames use utf8 for characters
+ * that don't need it when regexes don't have to be in utf8
+ * for Unicode semantics. If doing so, remember EBCDIC */
+ sv_utf8_upgrade(res);
+ str = SvPV_const(res, len);
+
+ /* Don't accept malformed input */
+ if (! is_utf8_string((U8 *) str, len)) {
+ yyerror("Malformed UTF-8 returned by \\N");
+ }
+ else if (PL_lex_inpat) {
+
+ if (! len) { /* The name resolved to an empty string */
+ Copy("\\N{}", d, 4, char);
+ d += 4;
+ }
+ else {
+ /* In order to not lose information for the regex
+ * compiler, pass the result in the specially made
+ * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
+ * the code points in hex of each character
+ * returned by charnames */
+
+ const char *str_end = str + len;
+ STRLEN char_length; /* cur char's byte length */
+ STRLEN output_length; /* and the number of bytes
+ after this is translated
+ into hex digits */
+ const STRLEN off = d - SvPVX_const(sv);
+
+ /* 2 hex per byte; 2 chars for '\N'; 2 chars for
+ * max('U+', '.'); and 1 for NUL */
+ char hex_string[2 * UTF8_MAXBYTES + 5];
+
+ /* Get the first character of the result. */
+ U32 uv = utf8n_to_uvuni((U8 *) str,
+ len,
+ &char_length,
+ UTF8_ALLOW_ANYUV);
+
+ /* The call to is_utf8_string() above hopefully
+ * guarantees that there won't be an error. But
+ * it's easy here to make sure. The function just
+ * above warns and returns 0 if invalid utf8, but
+ * it can also return 0 if the input is validly a
+ * NUL. Disambiguate */
+ if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
+ uv = UNICODE_REPLACEMENT;
+ }
+
+ /* Convert first code point to hex, including the
+ * boiler plate before it */
+ sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
+ output_length = strlen(hex_string);
+
+ /* Make sure there is enough space to hold it */
+ d = off + SvGROW(sv, off
+ + output_length
+ + (STRLEN)(send - e)
+ + 2); /* '}' + NUL */
+ /* And output it */
+ Copy(hex_string, d, output_length, char);
+ d += output_length;
+
+ /* For each subsequent character, append dot and
+ * its ordinal in hex */
+ while ((str += char_length) < str_end) {
+ const STRLEN off = d - SvPVX_const(sv);
+ U32 uv = utf8n_to_uvuni((U8 *) str,
+ str_end - str,
+ &char_length,
+ UTF8_ALLOW_ANYUV);
+ if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
+ uv = UNICODE_REPLACEMENT;
+ }
+
+ sprintf(hex_string, ".%X", (unsigned int) uv);
+ output_length = strlen(hex_string);
+
+ d = off + SvGROW(sv, off
+ + output_length
+ + (STRLEN)(send - e)
+ + 2); /* '}' + NUL */
+ Copy(hex_string, d, output_length, char);
+ d += output_length;
+ }
+
+ *d++ = '}'; /* Done. Add the trailing brace */
+ }
+ }
+ else { /* Here, not in a pattern. Convert the name to a
+ * string. */
+
+ /* If destination is not in utf8, unconditionally
+ * recode it to be so. This is because \N{} implies
+ * Unicode semantics, and scalars have to be in utf8
+ * to guarantee those semantics */
+ if (! has_utf8) {
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
+ /* See Note on sizing above. */
+ sv_utf8_upgrade_flags_grow(sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ len + (STRLEN)(send - s) + 1);
+ d = SvPVX(sv) + SvCUR(sv);
+ has_utf8 = TRUE;
+ } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
+
+ /* See Note on sizing above. (NOTE: SvCUR() is not
+ * set correctly here). */
+ const STRLEN off = d - SvPVX_const(sv);
+ d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
+ }
+ Copy(str, d, len, char);
+ d += len;
+ }
+ SvREFCNT_dec(res);
- SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
- d = SvPVX(sv) + (d - odest);
+ /* Deprecate non-approved name syntax */
+ if (ckWARN_d(WARN_DEPRECATED)) {
+ bool problematic = FALSE;
+ char* i = s;
+
+ /* For non-ut8 input, look to see that the first
+ * character is an alpha, then loop through the rest
+ * checking that each is a continuation */
+ if (! this_utf8) {
+ if (! isALPHAU(*i)) problematic = TRUE;
+ else for (i = s + 1; i < e; i++) {
+ if (isCHARNAME_CONT(*i)) continue;
+ problematic = TRUE;
+ break;
+ }
+ }
+ else {
+ /* Similarly for utf8. For invariants can check
+ * directly. We accept anything above the latin1
+ * range because it is immaterial to Perl if it is
+ * correct or not, and is expensive to check. But
+ * it is fairly easy in the latin1 range to convert
+ * the variants into a single character and check
+ * those */
+ if (UTF8_IS_INVARIANT(*i)) {
+ if (! isALPHAU(*i)) problematic = TRUE;
+ } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
+ if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
+ *(i+1)))))
+ {
+ problematic = TRUE;
+ }
+ }
+ if (! problematic) for (i = s + UTF8SKIP(s);
+ i < e;
+ i+= UTF8SKIP(i))
+ {
+ if (UTF8_IS_INVARIANT(*i)) {
+ if (isCHARNAME_CONT(*i)) continue;
+ } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
+ continue;
+ } else if (isCHARNAME_CONT(
+ UNI_TO_NATIVE(
+ UTF8_ACCUMULATE(*i, *(i+1)))))
+ {
+ continue;
+ }
+ problematic = TRUE;
+ break;
+ }
+ }
+ if (problematic) {
+ /* The e-i passed to the final %.*s makes sure that
+ * should the trailing NUL be missing that this
+ * print won't run off the end of the string */
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s", i - s + 1, s, e - i, i + 1);
+ }
}
+ } /* End \N{NAME} */
#ifdef EBCDIC
- if (!dorange)
- native_range = FALSE; /* \N{} is guessed to be Unicode */
+ if (!dorange)
+ native_range = FALSE; /* \N{} is defined to be Unicode */
#endif
- Copy(str, d, len, char);
- d += len;
- SvREFCNT_dec(res);
- cont_scan:
- s = e + 1;
- }
- else
- yyerror("Missing braces on \\N{}");
+ s = e + 1; /* Point to just after the '}' */
continue;
/* \c is a control character */
case 'c':
s++;
if (s < send) {
- U8 c = *s++;
-#ifdef EBCDIC
- if (isLOWER(c))
- c = toUPPER(c);
-#endif
- *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
+ *d++ = grok_bslash_c(*s++, 1);
}
else {
yyerror("Missing control char name in \\c");
#endif
default_action:
- /* If we started with encoded form, or already know we want it
- and then encode the next character */
- if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
+ /* If we started with encoded form, or already know we want it,
+ then encode the next character */
+ if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
STRLEN len = 1;
+
+
+ /* One might think that it is wasted effort in the case of the
+ * source being utf8 (this_utf8 == TRUE) to take the next character
+ * in the source, convert it to an unsigned value, and then convert
+ * it back again. But the source has not been validated here. The
+ * routine that does the conversion checks for errors like
+ * malformed utf8 */
+
const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
- s += len;
- if (need > len) {
- /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
+ if (!has_utf8) {
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
+ /* See Note on sizing above. */
+ sv_utf8_upgrade_flags_grow(sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ need + (STRLEN)(send - s) + 1);
+ d = SvPVX(sv) + SvCUR(sv);
+ has_utf8 = TRUE;
+ } else if (need > len) {
+ /* encoded value larger than old, may need extra space (NOTE:
+ * SvCUR() is not set correctly here). See Note on sizing
+ * above. */
const STRLEN off = d - SvPVX_const(sv);
- d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
+ d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
}
+ s += len;
+
d = (char*)uvchr_to_utf8((U8*)d, nextuv);
- has_utf8 = TRUE;
#ifdef EBCDIC
if (uv > 255 && !dorange)
native_range = FALSE;
SvPV_shrink_to_cur(sv);
}
- /* return the substring (via yylval) only if we parsed anything */
+ /* return the substring (via pl_yylval) only if we parsed anything */
if (s > PL_bufptr) {
- if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
- sv = new_constant(start, s - start,
- (const char *)(PL_lex_inpat ? "qr" : "q"),
- sv, NULL,
- (const char *)
- (( PL_lex_inwhat == OP_TRANS
- ? "tr"
- : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
- ? "s"
- : "qq"))));
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
+ if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
+ const char *const key = PL_lex_inpat ? "qr" : "q";
+ const STRLEN keylen = PL_lex_inpat ? 2 : 1;
+ const char *type;
+ STRLEN typelen;
+
+ if (PL_lex_inwhat == OP_TRANS) {
+ type = "tr";
+ typelen = 2;
+ } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
+ type = "s";
+ typelen = 1;
+ } else {
+ type = "qq";
+ typelen = 2;
+ }
+
+ sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
+ type, typelen);
+ }
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
} else
SvREFCNT_dec(sv);
return s;
S_intuit_more(pTHX_ register char *s)
{
dVAR;
+
+ PERL_ARGS_ASSERT_INTUIT_MORE;
+
if (PL_lex_brackets)
return TRUE;
if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
int soff;
#endif
+ PERL_ARGS_ASSERT_INTUIT_METHOD;
+
if (gv) {
if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
return 0;
*/
if (*start == '$') {
- if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
+ if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
+ isUPPER(*PL_tokenbuf))
return 0;
#ifdef PERL_MAD
len = start - SvPVX(PL_linestr);
bare_package:
start_force(PL_curforce);
NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
- newSVpvn(tmpbuf,len));
+ S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
if (PL_madskills)
curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
return 0;
}
-/*
- * S_incl_perldb
- * Return a string of Perl code to load the debugger. If PERL5DB
- * is set, it will return the contents of that, otherwise a
- * compile-time require of perl5db.pl.
- */
-
-STATIC const char*
-S_incl_perldb(pTHX)
-{
- dVAR;
- if (PL_perldb) {
- const char * const pdb = PerlEnv_getenv("PERL5DB");
-
- if (pdb)
- return pdb;
- SETERRNO(0,SS_NORMAL);
- return "BEGIN { require 'perl5db.pl' }";
- }
- return "";
-}
-
-
/* Encoded script support. filter_add() effectively inserts a
* 'pre-processing' function into the current source input stream.
* Note that the filter function only applies to the current source file
if (!funcp)
return NULL;
+ if (!PL_parser)
+ return NULL;
+
if (!PL_rsfp_filters)
PL_rsfp_filters = newAV();
if (!datasv)
dVAR;
SV *datasv;
+ PERL_ARGS_ASSERT_FILTER_DEL;
+
#ifdef DEBUGGING
DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
FPTR2DPTR(void*, funcp)));
#endif
- if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
+ if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
return;
/* if filter is on top of stack (usual case) just pop it off */
datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
#endif
: maxlen;
- if (!PL_rsfp_filters)
+ PERL_ARGS_ASSERT_FILTER_READ;
+
+ if (!PL_parser || !PL_rsfp_filters)
return -1;
if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
/* Provide a default input filter to make life easy. */
const int old_len = SvCUR(buf_sv);
/* ensure buf_sv is large enough */
- SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
+ SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
correct_length)) <= 0) {
if (PerlIO_error(PL_rsfp))
return 0 ; /* end of file */
}
SvCUR_set(buf_sv, old_len + len) ;
+ SvPVX(buf_sv)[old_len + len] = '\0';
} else {
/* Want a line */
if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
}
STATIC char *
-S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
+S_filter_gets(pTHX_ register SV *sv, STRLEN append)
{
dVAR;
+
+ PERL_ARGS_ASSERT_FILTER_GETS;
+
#ifdef PERL_CR_FILTER
if (!PL_rsfp_filters) {
filter_add(S_cr_textfilter,NULL);
return NULL ;
}
else
- return (sv_gets(sv, fp, append));
+ return (sv_gets(sv, PL_rsfp, append));
}
STATIC HV *
-S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
+S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
{
dVAR;
GV *gv;
+ PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
+
if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
return PL_curstash;
if (gv && GvCV(gv)) {
SV * const sv = cv_const_sv(GvCV(gv));
if (sv)
- pkgname = SvPV_nolen_const(sv);
+ pkgname = SvPV_const(sv, len);
}
- return gv_stashpv(pkgname, 0);
+ return gv_stashpvn(pkgname, len, 0);
}
/*
{
GV **gvp;
GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
- yylval.ival = OP_BACKTICK;
+ pl_yylval.ival = OP_BACKTICK;
if ((gv_readpipe
&& GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
||
newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
}
- else {
- set_csh();
- }
}
#ifdef PERL_MAD
case FUNC0SUB:
case UNIOPSUB:
case LSTOPSUB:
- if (yylval.opval)
- append_madprops(PL_thismad, yylval.opval, 0);
+ if (pl_yylval.opval)
+ append_madprops(PL_thismad, pl_yylval.opval, 0);
PL_thismad = 0;
return optype;
}
/* Create new token struct. Note: opvals return early above. */
- yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
+ pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
PL_thismad = 0;
return optype;
}
STATIC char *
S_tokenize_use(pTHX_ int is_use, char *s) {
dVAR;
+
+ PERL_ARGS_ASSERT_TOKENIZE_USE;
+
if (PL_expect != XSTATE)
yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
is_use ? "use" : "no"));
s = SKIPSPACE1(s);
if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
s = force_version(s, TRUE);
- if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
+ if (*s == ';' || *s == '}'
+ || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
start_force(PL_curforce);
NEXTVAL_NEXTTOKE.opval = NULL;
force_next(WORD);
s = force_word(s,WORD,FALSE,TRUE,FALSE);
s = force_version(s, FALSE);
}
- yylval.ival = is_use;
+ pl_yylval.ival = is_use;
return s;
}
#ifdef DEBUGGING
register char *d;
STRLEN len;
bool bof = FALSE;
+ U32 fake_eof = 0;
/* orig_keyword, gvp, and gv are initialized here because
* jump to the label just_a_word_zero can bypass their
case LEX_KNOWNEXT:
#ifdef PERL_MAD
PL_lasttoke--;
- yylval = PL_nexttoke[PL_lasttoke].next_val;
+ pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
if (PL_madskills) {
PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
PL_nexttoke[PL_lasttoke].next_mad = 0;
if (PL_thismad && PL_thismad->mad_key == '_') {
- PL_thiswhite = (SV*)PL_thismad->mad_val;
+ PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
PL_thismad->mad_val = 0;
mad_free(PL_thismad);
PL_thismad = 0;
}
#else
PL_nexttoke--;
- yylval = PL_nextval[PL_nexttoke];
+ pl_yylval = PL_nextval[PL_nexttoke];
if (!PL_nexttoke) {
PL_lex_state = PL_lex_defer;
PL_expect = PL_lex_expect;
else
Perl_croak(aTHX_ "panic: yylex");
if (PL_madskills) {
- SV* const tmpsv = newSVpvs("");
- Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
+ SV* const tmpsv = newSVpvs("\\ ");
+ /* replace the space with the character we want to escape
+ */
+ SvPVX(tmpsv)[1] = *s;
curmad('_', tmpsv);
}
PL_bufptr = s + 1;
if (!PL_lex_inpat)
sv = tokeq(sv);
else if ( PL_hints & HINT_NEW_RE )
- sv = new_constant(NULL, 0, "qr", sv, sv, "q");
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
+ sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
s = PL_bufend;
}
else {
if (PL_madskills) {
curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
}
- NEXTVAL_NEXTTOKE = yylval;
+ NEXTVAL_NEXTTOKE = pl_yylval;
PL_expect = XTERM;
force_next(THING);
if (PL_lex_starts++) {
default:
if (isIDFIRST_lazy_if(s,UTF))
goto keylookup;
- Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
+ {
+ unsigned char c = *s;
+ len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
+ if (len > UNRECOGNIZED_PRECEDE_COUNT) {
+ d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
+ } else {
+ d = PL_linestart;
+ }
+ *s = '\0';
+ Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
+ }
case 4:
case 26:
goto fake_eof; /* emulate EOF on ^D or ^Z */
if (PL_madskills)
PL_faketokens = 1;
#endif
- sv_setpv(PL_linestr,incl_perldb());
- if (SvCUR(PL_linestr))
- sv_catpvs(PL_linestr,";");
- if (PL_preambleav){
- while(AvFILLp(PL_preambleav) >= 0) {
- SV *tmpsv = av_shift(PL_preambleav);
- sv_catsv(PL_linestr, tmpsv);
+ if (PL_perldb) {
+ /* Generate a string of Perl code to load the debugger.
+ * If PERL5DB is set, it will return the contents of that,
+ * otherwise a compile-time require of perl5db.pl. */
+
+ const char * const pdb = PerlEnv_getenv("PERL5DB");
+
+ if (pdb) {
+ sv_setpv(PL_linestr, pdb);
+ sv_catpvs(PL_linestr,";");
+ } else {
+ SETERRNO(0,SS_NORMAL);
+ sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
+ }
+ } else
+ sv_setpvs(PL_linestr,"");
+ if (PL_preambleav) {
+ SV **svp = AvARRAY(PL_preambleav);
+ SV **const end = svp + AvFILLp(PL_preambleav);
+ while(svp <= end) {
+ sv_catsv(PL_linestr, *svp);
+ ++svp;
sv_catpvs(PL_linestr, ";");
- sv_free(tmpsv);
}
- sv_free((SV*)PL_preambleav);
+ sv_free(MUTABLE_SV(PL_preambleav));
PL_preambleav = NULL;
}
+ if (PL_minus_E)
+ sv_catpvs(PL_linestr,
+ "use feature ':5." STRINGIFY(PERL_VERSION) "';");
if (PL_minus_n || PL_minus_p) {
- sv_catpvs(PL_linestr, "LINE: while (<>) {");
+ sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
if (PL_minus_l)
sv_catpvs(PL_linestr,"chomp;");
if (PL_minus_a) {
sv_catpvs(PL_linestr,"our @F=split(' ');");
}
}
- if (PL_minus_E)
- sv_catpvs(PL_linestr,"use feature ':5.10';");
sv_catpvs(PL_linestr, "\n");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
- if (PERLDB_LINE && PL_curstash != PL_debstash)
+ if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
update_debugger_info(PL_linestr, NULL, 0);
goto retry;
}
do {
+ fake_eof = 0;
bof = PL_rsfp ? TRUE : FALSE;
- if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
+ if (0) {
fake_eof:
+ fake_eof = LEX_FAKE_EOF;
+ }
+ PL_bufptr = PL_bufend;
+ CopLINE_inc(PL_curcop);
+ if (!lex_next_chunk(fake_eof)) {
+ CopLINE_dec(PL_curcop);
+ s = PL_bufptr;
+ TOKEN(';'); /* not infinite loop because rsfp is NULL now */
+ }
+ CopLINE_dec(PL_curcop);
#ifdef PERL_MAD
+ if (!PL_rsfp)
PL_realtokenstart = -1;
#endif
- if (PL_rsfp) {
- if (PL_preprocess && !PL_in_eval)
- (void)PerlProc_pclose(PL_rsfp);
- else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
- PerlIO_clearerr(PL_rsfp);
- else
- (void)PerlIO_close(PL_rsfp);
- PL_rsfp = NULL;
- PL_doextract = FALSE;
- }
- if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
-#ifdef PERL_MAD
- if (PL_madskills)
- PL_faketokens = 1;
-#endif
- sv_setpv(PL_linestr,
- (const char *)
- (PL_minus_p
- ? ";}continue{print;}" : ";}"));
- PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
- PL_minus_n = PL_minus_p = 0;
- goto retry;
- }
- PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
- sv_setpvn(PL_linestr,"",0);
- TOKEN(';'); /* not infinite loop because rsfp is NULL now */
- }
+ s = PL_bufptr;
/* If it looks like the start of a BOM or raw UTF-16,
* check if it in fact is. */
- else if (bof &&
+ if (bof && PL_rsfp &&
(*s == 0 ||
*(U8*)s == 0xEF ||
*(U8*)s >= 0xFE ||
s[1] == 0)) {
-#ifdef PERLIO_IS_STDIO
-# ifdef __GNU_LIBRARY__
-# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
-# define FTELL_FOR_PIPE_IS_BROKEN
-# endif
-# else
-# ifdef __GLIBC__
-# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
-# define FTELL_FOR_PIPE_IS_BROKEN
-# endif
-# endif
-# endif
-#endif
-#ifdef FTELL_FOR_PIPE_IS_BROKEN
- /* This loses the possibility to detect the bof
- * situation on perl -P when the libc5 is being used.
- * Workaround? Maybe attach some extra state to PL_rsfp?
- */
- if (!PL_preprocess)
- bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
-#else
bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
-#endif
if (bof) {
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = swallow_bom((U8*)s);
sv_catsv(PL_thiswhite, PL_linestr);
#endif
if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
- sv_setpvn(PL_linestr, "", 0);
+ sv_setpvs(PL_linestr, "");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
PL_doextract = FALSE;
}
}
- incline(s);
+ if (PL_rsfp)
+ incline(s);
} while (PL_doextract);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
- if (PERLDB_LINE && PL_curstash != PL_debstash)
- update_debugger_info(PL_linestr, NULL, 0);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
if (CopLINE(PL_curcop) == 1) {
*s = '#'; /* Don't try to parse shebang line */
}
#endif /* ALTERNATE_SHEBANG */
-#ifndef MACOS_TRADITIONAL
if (!d &&
*s == '#' &&
ipathend > ipath &&
while (s < PL_bufend && isSPACE(*s))
s++;
if (s < PL_bufend) {
- Newxz(newargv,PL_origargc+3,char*);
+ Newx(newargv,PL_origargc+3,char*);
newargv[1] = s;
while (s < PL_bufend && !isSPACE(*s))
s++;
PERL_FPU_POST_EXEC
Perl_croak(aTHX_ "Can't exec %s", ipath);
}
-#endif
if (d) {
while (*d && !isSPACE(*d))
d++;
const U32 oldpdb = PL_perldb;
const bool oldn = PL_minus_n;
const bool oldp = PL_minus_p;
+ const char *d1 = d;
do {
- if (*d == 'M' || *d == 'm' || *d == 'C') {
- const char * const m = d;
- while (*d && !isSPACE(*d))
- d++;
+ bool baduni = FALSE;
+ if (*d1 == 'C') {
+ const char *d2 = d1 + 1;
+ if (parse_unicode_opts((const char **)&d2)
+ != PL_unicode)
+ baduni = TRUE;
+ }
+ if (baduni || *d1 == 'M' || *d1 == 'm') {
+ const char * const m = d1;
+ while (*d1 && !isSPACE(*d1))
+ d1++;
Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
- (int)(d - m), m);
+ (int)(d1 - m), m);
}
- d = moreswitches(d);
- } while (d);
+ d1 = moreswitches(d1);
+ } while (d1);
if (PL_doswitches && !switches_done) {
int argc = PL_origargc;
char **argv = PL_origargv;
} while (argc && argv[0][0] == '-' && argv[0][1]);
init_argv_symbols(argc,argv);
}
- if ((PERLDB_LINE && !oldpdb) ||
+ if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
/* if we have already added "LINE: while (<>) {",
we must not do it again */
{
- sv_setpvn(PL_linestr, "", 0);
+ sv_setpvs(PL_linestr, "");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
PL_preambled = FALSE;
- if (PERLDB_LINE)
+ if (PERLDB_LINE || PERLDB_SAVESRC)
(void)gv_fetchfile(PL_origfilename);
goto retry;
}
"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
#endif
case ' ': case '\t': case '\f': case 013:
-#ifdef MACOS_TRADITIONAL
- case '\312':
-#endif
#ifdef PERL_MAD
PL_realtokenstart = -1;
- s = SKIPSPACE0(s);
-#else
- s++;
+ if (!PL_thiswhite)
+ PL_thiswhite = newSVpvs("");
+ sv_catpvn(PL_thiswhite, s, 1);
#endif
+ s++;
goto retry;
case '#':
case '\n':
if (!PL_thiswhite)
PL_thiswhite = newSVpvs("");
if (CopLINE(PL_curcop) == 1) {
- sv_setpvn(PL_thiswhite, "", 0);
+ sv_setpvs(PL_thiswhite, "");
PL_faketokens = 0;
}
sv_catpvn(PL_thiswhite, s, d - s);
BOop(OP_BIT_XOR);
case '[':
PL_lex_brackets++;
- /* FALL THROUGH */
+ {
+ const char tmp = *s++;
+ OPERATOR(tmp);
+ }
case '~':
if (s[1] == '~'
&& (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
if (!PL_in_my || PL_lex_state != LEX_NORMAL)
break;
PL_bufptr = s; /* update in case we back off */
+ if (*s == '=') {
+ deprecate(":= for an empty attribute list");
+ }
goto grabattrs;
case XATTRBLOCK:
PL_expect = XBLOCK;
switch (tmp) {
case KEY_or:
case KEY_and:
- case KEY_err:
case KEY_for:
+ case KEY_foreach:
case KEY_unless:
case KEY_if:
case KEY_while:
if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
sv_free(sv);
if (PL_in_my == KEY_our) {
-#ifdef USE_ITHREADS
- GvUNIQUE_on(cGVOPx_gv(yylval.opval));
-#else
- /* skip to avoid loading attributes.pm */
-#endif
deprecate(":unique");
}
else
}
else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
sv_free(sv);
- CvLOCKED_on(PL_compcv);
+ deprecate(":locked");
}
else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
sv_free(sv);
CvMETHOD_on(PL_compcv);
}
- else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
- sv_free(sv);
- CvASSERTION_on(PL_compcv);
- }
/* After we've set the flags, it could be argued that
we don't need to do the attributes.pm-based setting
process, and shouldn't bother appending recognized
--PL_lex_brackets;
if (PL_lex_state == LEX_INTERPNORMAL) {
if (PL_lex_brackets == 0) {
- if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
+ if (*s == '-' && s[1] == '>')
+ PL_lex_state = LEX_INTERPENDMAYBE;
+ else if (*s != '[' && *s != '{')
PL_lex_state = LEX_INTERPEND;
}
}
}
break;
}
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
if (isSPACE(*s) || *s == '#')
PL_copline = NOLINE; /* invalidate current command line number */
TOKEN('{');
if (PL_madskills) {
if (!PL_thiswhite)
PL_thiswhite = newSVpvs("");
- sv_catpvn(PL_thiswhite,"}",1);
+ sv_catpvs(PL_thiswhite,"}");
}
#endif
return yylex(); /* ignore fake brackets */
&& isIDFIRST_lazy_if(s,UTF))
{
CopLINE_dec(PL_curcop);
- Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
+ Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
CopLINE_inc(PL_curcop);
}
BAop(OP_BIT_AND);
}
else
PREREF('&');
- yylval.ival = (OPpENTERSUB_AMPER<<8);
+ pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
TERM('&');
case '|':
goto leftbracket;
}
}
- yylval.ival = 0;
+ pl_yylval.ival = 0;
OPERATOR(ASSIGNOP);
case '!':
s++;
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
- PL_expect = XTERM;
- deprecate_old(commaless_variable_list);
- return REPORT(','); /* grandfather non-comma-format format */
+ return deprecate_commaless_var_list();
}
}
- if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
+ if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
PL_tokenbuf[0] = '@';
s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
sizeof PL_tokenbuf - 1, FALSE);
/* This kludge not intended to be bulletproof. */
if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
- yylval.opval = newSVOP(OP_CONST, 0,
+ pl_yylval.opval = newSVOP(OP_CONST, 0,
newSViv(CopARYBASE_get(&PL_compiling)));
- yylval.opval->op_private = OPpCONST_ARYBASE;
+ pl_yylval.opval->op_private = OPpCONST_ARYBASE;
TERM(THING);
}
d = s;
{
const char tmp = *s;
- if (PL_lex_state == LEX_NORMAL)
+ if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
s = SKIPSPACE1(s);
if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
AOPERATOR(DORDOR);
}
case '?': /* may either be conditional or pattern */
- if(PL_expect == XOPERATOR) {
+ if (PL_expect == XOPERATOR) {
char tmp = *s++;
if(tmp == '?') {
- OPERATOR('?');
+ OPERATOR('?');
}
else {
tmp = *s++;
PL_expect = XSTATE;
goto rightbracket;
}
+ if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
+ s += 3;
+ OPERATOR(YADAYADA);
+ }
if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
char tmp = *s++;
if (*s == tmp) {
s++;
if (*s == tmp) {
s++;
- yylval.ival = OPf_SPECIAL;
+ pl_yylval.ival = OPf_SPECIAL;
}
else
- yylval.ival = 0;
+ pl_yylval.ival = 0;
OPERATOR(DOTDOT);
}
- if (PL_expect != XOPERATOR)
- check_uni();
Aop(OP_CONCAT);
}
/* FALL THROUGH */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- s = scan_num(s, &yylval);
+ s = scan_num(s, &pl_yylval);
DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
if (PL_expect == XOPERATOR)
no_op("Number",s);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
- PL_expect = XTERM;
- deprecate_old(commaless_variable_list);
- return REPORT(','); /* grandfather non-comma-format format */
+ return deprecate_commaless_var_list();
}
else
no_op("String",s);
}
if (!s)
missingterm(NULL);
- yylval.ival = OP_CONST;
+ pl_yylval.ival = OP_CONST;
TERM(sublex_start());
case '"':
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
- PL_expect = XTERM;
- deprecate_old(commaless_variable_list);
- return REPORT(','); /* grandfather non-comma-format format */
+ return deprecate_commaless_var_list();
}
else
no_op("String",s);
}
if (!s)
missingterm(NULL);
- yylval.ival = OP_CONST;
+ pl_yylval.ival = OP_CONST;
/* FIXME. I think that this can be const if char *d is replaced by
more localised variables. */
for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
- yylval.ival = OP_STRINGIFY;
+ pl_yylval.ival = OP_STRINGIFY;
break;
}
}
case '\\':
s++;
- if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
- *s, *s);
+ if (PL_lex_inwhat && isDIGIT(*s))
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
+ *s, *s);
if (PL_expect == XOPERATOR)
no_op("Backslash",s);
OPERATOR(REFGEN);
while (isDIGIT(*start) || *start == '_')
start++;
if (*start == '.' && isDIGIT(start[1])) {
- s = scan_num(s, &yylval);
+ s = scan_num(s, &pl_yylval);
TERM(THING);
}
/* avoid v123abc() or $h{v1}, allow C<print v10;> */
else if (!isALPHA(*start) && (PL_expect == XTERM
|| PL_expect == XREF || PL_expect == XSTATE
|| PL_expect == XTERMORDORDOR)) {
- /* XXX Use gv_fetchpvn rather than stomping on a const string */
- const char c = *start;
- GV *gv;
- *start = '\0';
- gv = gv_fetchpv(s, 0, SVt_PVCV);
- *start = c;
+ GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
if (!gv) {
- s = scan_num(s, &yylval);
+ s = scan_num(s, &pl_yylval);
TERM(THING);
}
}
case 'z': case 'Z':
keylookup: {
+ bool anydelim;
I32 tmp;
orig_keyword = 0;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
/* Some keywords can be followed by any delimiter, including ':' */
- tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
+ anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
(len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
(PL_tokenbuf[0] == 'q' &&
strchr("qwxr", PL_tokenbuf[1])))));
/* x::* is just a word, unless x is "CORE" */
- if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
+ if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
goto just_a_word;
d = s;
while (d < PL_bufend && isSPACE(*d))
d++; /* no comments skipped here, or s### is misparsed */
- /* Is this a label? */
- if (!tmp && PL_expect == XSTATE
- && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
- s = d + 1;
- yylval.pval = CopLABEL_alloc(PL_tokenbuf);
- CLINE;
- TOKEN(LABEL);
- }
-
- /* Check for keywords */
- tmp = keyword(PL_tokenbuf, len, 0);
-
/* Is this a word before a => operator? */
if (*d == '=' && d[1] == '>') {
CLINE;
- yylval.opval
+ pl_yylval.opval
= (OP*)newSVOP(OP_CONST, 0,
S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
- yylval.opval->op_private = OPpCONST_BARE;
+ pl_yylval.opval->op_private = OPpCONST_BARE;
TERM(WORD);
}
+ /* Check for plugged-in keyword */
+ {
+ OP *o;
+ int result;
+ char *saved_bufptr = PL_bufptr;
+ PL_bufptr = s;
+ result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
+ s = PL_bufptr;
+ if (result == KEYWORD_PLUGIN_DECLINE) {
+ /* not a plugged-in keyword */
+ PL_bufptr = saved_bufptr;
+ } else if (result == KEYWORD_PLUGIN_STMT) {
+ pl_yylval.opval = o;
+ CLINE;
+ PL_expect = XSTATE;
+ return REPORT(PLUGSTMT);
+ } else if (result == KEYWORD_PLUGIN_EXPR) {
+ pl_yylval.opval = o;
+ CLINE;
+ PL_expect = XOPERATOR;
+ return REPORT(PLUGEXPR);
+ } else {
+ Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
+ PL_tokenbuf);
+ }
+ }
+
+ /* Check for built-in keyword */
+ tmp = keyword(PL_tokenbuf, len, 0);
+
+ /* Is this a label? */
+ if (!anydelim && PL_expect == XSTATE
+ && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
+ s = d + 1;
+ pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
+ CLINE;
+ TOKEN(LABEL);
+ }
+
if (tmp < 0) { /* second-class keyword? */
GV *ogv = NULL; /* override (winner) */
GV *hgv = NULL; /* hidden (loser) */
}
else if (gv && !gvp
&& -tmp==KEY_lock /* XXX generalizable kludge */
- && GvCVu(gv)
- && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
+ && GvCVu(gv))
{
tmp = 0; /* any sub overrides "weak" keyword */
}
else { /* no override */
tmp = -tmp;
- if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
- Perl_warner(aTHX_ packWARN(WARN_MISC),
- "dump() better written as CORE::dump()");
+ if (tmp == KEY_dump) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
+ "dump() better written as CORE::dump()");
}
gv = NULL;
gvp = 0;
- if (hgv && tmp != KEY_x && tmp != KEY_CORE
- && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous call resolved as CORE::%s(), %s",
- GvENAME(hgv), "qualify as such or use &");
+ if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
+ Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous call resolved as CORE::%s(), %s",
+ GvENAME(hgv), "qualify as such or use &");
}
}
SV *sv;
int pkgname = 0;
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
+ OP *rv2cv_op;
CV *cv;
#ifdef PERL_MAD
SV *nextPL_nextwhite = 0;
if (PL_expect == XOPERATOR) {
if (PL_bufptr == PL_linestart) {
CopLINE_dec(PL_curcop);
- Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
+ Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
CopLINE_inc(PL_curcop);
}
else
#ifdef PERL_MAD
if (PL_madskills && !PL_thistoken) {
char *start = SvPVX(PL_linestr) + PL_realtokenstart;
- PL_thistoken = newSVpv(start,s - start);
+ PL_thistoken = newSVpvn(start,s - start);
PL_realtokenstart = s - SvPVX(PL_linestr);
}
#endif
/* Presume this is going to be a bareword of some sort. */
CLINE;
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
- yylval.opval->op_private = OPpCONST_BARE;
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
+ pl_yylval.opval->op_private = OPpCONST_BARE;
/* UTF-8 package name? */
if (UTF && !IN_BYTES &&
is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
if (len)
goto safe_bareword;
- /* Do the explicit type check so that we don't need to force
- the initialisation of the symbol table to have a real GV.
- Beware - gv may not really be a PVGV, cv may not really be
- a PVCV, (because of the space optimisations that gv_init
- understands) But they're true if for this symbol there is
- respectively a typeglob and a subroutine.
- */
- cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
- /* Real typeglob, so get the real subroutine: */
- ? GvCVu(gv)
- /* A proxy for a subroutine in this package? */
- : SvOK(gv) ? (CV *) gv : NULL)
- : NULL;
+ cv = NULL;
+ {
+ OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
+ const_op->op_private = OPpCONST_BARE;
+ rv2cv_op = newCVREF(0, const_op);
+ }
+ if (rv2cv_op->op_type == OP_RV2CV &&
+ (rv2cv_op->op_flags & OPf_KIDS)) {
+ OP *rv_op = cUNOPx(rv2cv_op)->op_first;
+ switch (rv_op->op_type) {
+ case OP_CONST: {
+ SV *sv = cSVOPx_sv(rv_op);
+ if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
+ cv = (CV*)SvRV(sv);
+ } break;
+ case OP_GV: {
+ GV *gv = cGVOPx_gv(rv_op);
+ CV *maybe_cv = GvCVu(gv);
+ if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
+ cv = maybe_cv;
+ } break;
+ }
+ }
/* See if it's the indirect object for a list operator. */
/* Two barewords in a row may indicate method call. */
if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
- (tmp = intuit_method(s, gv, cv)))
+ (tmp = intuit_method(s, gv, cv))) {
+ op_free(rv2cv_op);
return REPORT(tmp);
+ }
/* If not a declared subroutine, it's an indirect object. */
/* (But it's an indir obj regardless for sort.) */
if (
( !immediate_paren && (PL_last_lop_op == OP_SORT ||
- ((!gv || !cv) &&
+ (!cv &&
(PL_last_lop_op != OP_MAPSTART &&
PL_last_lop_op != OP_GREPSTART))))
|| (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
/* Is this a word before a => operator? */
if (*s == '=' && s[1] == '>' && !pkgname) {
+ op_free(rv2cv_op);
CLINE;
- sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
+ sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
- SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
+ SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
TERM(WORD);
}
d = s + 1;
while (SPACE_OR_TAB(*d))
d++;
- if (*d == ')' && (sv = gv_const_sv(gv))) {
+ if (*d == ')' && (sv = cv_const_sv(cv))) {
s = d + 1;
-#ifdef PERL_MAD
- if (PL_madskills) {
- char *par = SvPVX(PL_linestr) + PL_realtokenstart;
- sv_catpvn(PL_thistoken, par, s - par);
- if (PL_nextwhite) {
- sv_free(PL_nextwhite);
- PL_nextwhite = 0;
- }
- }
-#endif
goto its_constant;
}
}
}
start_force(PL_curforce);
#endif
- NEXTVAL_NEXTTOKE.opval = yylval.opval;
+ NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
PL_expect = XOPERATOR;
#ifdef PERL_MAD
if (PL_madskills) {
PL_thistoken = newSVpvs("");
}
#endif
+ op_free(rv2cv_op);
force_next(WORD);
- yylval.ival = 0;
+ pl_yylval.ival = 0;
TOKEN('&');
}
/* If followed by var or block, call it a method (unless sub) */
- if ((*s == '$' || *s == '{') && (!gv || !cv)) {
+ if ((*s == '$' || *s == '{') && !cv) {
+ op_free(rv2cv_op);
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_METHOD;
PREBLOCK(METHOD);
if (!orig_keyword
&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
- && (tmp = intuit_method(s, gv, cv)))
+ && (tmp = intuit_method(s, gv, cv))) {
+ op_free(rv2cv_op);
return REPORT(tmp);
+ }
/* Not a method, so call it a subroutine (if defined) */
if (cv) {
- if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of -%s resolved as -&%s()",
- PL_tokenbuf, PL_tokenbuf);
+ if (lastchar == '-')
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous use of -%s resolved as -&%s()",
+ PL_tokenbuf, PL_tokenbuf);
/* Check for a constant sub */
- if ((sv = gv_const_sv(gv))) {
+ if ((sv = cv_const_sv(cv))) {
its_constant:
- SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
- ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
- yylval.opval->op_private = 0;
+ op_free(rv2cv_op);
+ SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
+ ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
+ pl_yylval.opval->op_private = 0;
TOKEN(WORD);
}
- /* Resolve to GV now. */
- if (SvTYPE(gv) != SVt_PVGV) {
- gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
- assert (SvTYPE(gv) == SVt_PVGV);
- /* cv must have been some sort of placeholder, so
- now needs replacing with a real code reference. */
- cv = GvCV(gv);
- }
-
- op_free(yylval.opval);
- yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
- yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
+ op_free(pl_yylval.opval);
+ pl_yylval.opval = rv2cv_op;
+ pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
/* Is there a prototype? */
SvPOK(cv))
{
STRLEN protolen;
- const char *proto = SvPV_const((SV*)cv, protolen);
+ const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
if (!protolen)
TERM(FUNC0SUB);
if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
while (*proto == ';')
proto++;
if (*proto == '&' && *s == '{') {
- sv_setpv(PL_subname,
- (const char *)
- (PL_curstash ?
- "__ANON__" : "__ANON__::__ANON__"));
+ if (PL_curstash)
+ sv_setpvs(PL_subname, "__ANON__");
+ else
+ sv_setpvs(PL_subname, "__ANON__::__ANON__");
PREBLOCK(LSTOPSUB);
}
}
PL_thiswhite = 0;
}
start_force(PL_curforce);
- NEXTVAL_NEXTTOKE.opval = yylval.opval;
+ NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
PL_expect = XTERM;
if (PL_madskills) {
PL_nextwhite = nextPL_nextwhite;
}
if (probable_sub) {
gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
- op_free(yylval.opval);
- yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
- yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
+ op_free(pl_yylval.opval);
+ pl_yylval.opval = rv2cv_op;
+ pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
PL_nextwhite = PL_thiswhite;
PL_thiswhite = 0;
start_force(PL_curforce);
- NEXTVAL_NEXTTOKE.opval = yylval.opval;
+ NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
PL_expect = XTERM;
PL_nextwhite = nextPL_nextwhite;
curmad('X', PL_thistoken);
TOKEN(NOAMP);
}
#else
- NEXTVAL_NEXTTOKE.opval = yylval.opval;
+ NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
PL_expect = XTERM;
force_next(WORD);
TOKEN(NOAMP);
/* Call it a bare word */
if (PL_hints & HINT_STRICT_SUBS)
- yylval.opval->op_private |= OPpCONST_STRICT;
+ pl_yylval.opval->op_private |= OPpCONST_STRICT;
else {
bareword:
+ /* after "print" and similar functions (corresponding to
+ * "F? L" in opcode.pl), whatever wasn't already parsed as
+ * a filehandle should be subject to "strict subs".
+ * Likewise for the optional indirect-object argument to system
+ * or exec, which can't be a bareword */
+ if ((PL_last_lop_op == OP_PRINT
+ || PL_last_lop_op == OP_PRTF
+ || PL_last_lop_op == OP_SAY
+ || PL_last_lop_op == OP_SYSTEM
+ || PL_last_lop_op == OP_EXEC)
+ && (PL_hints & HINT_STRICT_SUBS))
+ pl_yylval.opval->op_private |= OPpCONST_STRICT;
if (lastchar != '-') {
if (ckWARN(WARN_RESERVED)) {
d = PL_tokenbuf;
}
}
}
+ op_free(rv2cv_op);
safe_bareword:
- if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
- && ckWARN_d(WARN_AMBIGUOUS)) {
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Operator or semicolon missing before %c%s",
- lastchar, PL_tokenbuf);
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of %c resolved as operator %c",
- lastchar, lastchar);
+ if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Operator or semicolon missing before %c%s",
+ lastchar, PL_tokenbuf);
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous use of %c resolved as operator %c",
+ lastchar, lastchar);
}
TOKEN(WORD);
}
case KEY___FILE__:
- yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
newSVpv(CopFILE(PL_curcop),0));
TERM(THING);
case KEY___LINE__:
- yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
TERM(THING);
case KEY___PACKAGE__:
- yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
(PL_curstash
? newSVhek(HvNAME_HEK(PL_curstash))
: &PL_sv_undef));
#endif
/* Mark this internal pseudo-handle as clean */
IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
- if (PL_preprocess)
- IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
- else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
+ if ((PerlIO*)PL_rsfp == PerlIO_stdin())
IoTYPE(GvIOp(gv)) = IoTYPE_STD;
else
IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
PL_realtokenstart = -1;
}
- while ((s = filter_gets(PL_endwhite, PL_rsfp,
- SvCUR(PL_endwhite))) != Nullch) ;
+ while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
+ != NULL) ;
}
#endif
PL_rsfp = NULL;
s = force_word(s,WORD,TRUE,TRUE,FALSE);
if (orig_keyword == KEY_do) {
orig_keyword = 0;
- yylval.ival = 1;
+ pl_yylval.ival = 1;
}
else
- yylval.ival = 0;
+ pl_yylval.ival = 0;
OPERATOR(DO);
case KEY_die:
PREBLOCK(ELSE);
case KEY_elsif:
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
OPERATOR(ELSIF);
case KEY_eq:
case KEY_eval:
s = SKIPSPACE1(s);
- PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
- UNIBRACK(OP_ENTEREVAL);
+ if (*s == '{') { /* block eval */
+ PL_expect = XTERMBLOCK;
+ UNIBRACK(OP_ENTERTRY);
+ }
+ else { /* string eval */
+ PL_expect = XTERM;
+ UNIBRACK(OP_ENTEREVAL);
+ }
case KEY_eof:
UNI(OP_EOF);
- case KEY_err:
- OPERATOR(DOROP);
-
case KEY_exp:
UNI(OP_EXP);
UNI(OP_EACH);
case KEY_exec:
- set_csh();
LOP(OP_EXEC,XREF);
case KEY_endhostent:
case KEY_for:
case KEY_foreach:
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
s = SKIPSPACE1(s);
if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
char *p = s;
FUN0(OP_GETLOGIN);
case KEY_given:
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
OPERATOR(GIVEN);
case KEY_glob:
- set_csh();
LOP(OP_GLOB,XTERM);
case KEY_hex:
UNI(OP_HEX);
case KEY_if:
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
OPERATOR(IF);
case KEY_index:
UNI(OP_LCFIRST);
case KEY_local:
- yylval.ival = 0;
+ pl_yylval.ival = 0;
OPERATOR(LOCAL);
case KEY_length:
case KEY_our:
case KEY_my:
case KEY_state:
- PL_in_my = tmp;
+ PL_in_my = (U16)tmp;
s = SKIPSPACE1(s);
if (isIDFIRST_lazy_if(s,UTF)) {
#ifdef PERL_MAD
}
#endif
}
- yylval.ival = 1;
+ pl_yylval.ival = 1;
OPERATOR(MY);
case KEY_next:
LOP(OP_OPEN,XTERM);
case KEY_or:
- yylval.ival = OP_OR;
+ pl_yylval.ival = OP_OR;
OPERATOR(OROP);
case KEY_ord:
case KEY_package:
s = force_word(s,WORD,FALSE,TRUE,FALSE);
+ s = SKIPSPACE1(s);
+ s = force_strict_version(s);
OPERATOR(PACKAGE);
case KEY_pipe:
s = scan_str(s,!!PL_madskills,FALSE);
if (!s)
missingterm(NULL);
- yylval.ival = OP_CONST;
+ pl_yylval.ival = OP_CONST;
TERM(sublex_start());
case KEY_quotemeta:
for (; !isSPACE(*d) && len; --len, ++d)
/**/;
}
- sv = newSVpvn(b, d-b);
- if (DO_UTF8(PL_lex_stuff))
- SvUTF8_on(sv);
+ sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
words = append_elem(OP_LIST, words,
newSVOP(OP_CONST, 0, tokeq(sv)));
}
s = scan_str(s,!!PL_madskills,FALSE);
if (!s)
missingterm(NULL);
- yylval.ival = OP_STRINGIFY;
+ pl_yylval.ival = OP_STRINGIFY;
if (SvIVX(PL_lex_stuff) == '\'')
SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
TERM(sublex_start());
}
if (orig_keyword == KEY_require) {
orig_keyword = 0;
- yylval.ival = 1;
+ pl_yylval.ival = 1;
}
else
- yylval.ival = 0;
+ pl_yylval.ival = 0;
PL_expect = XTERM;
PL_bufptr = s;
PL_last_uni = PL_oldbufptr;
UNI(OP_READDIR);
case KEY_readline:
- set_csh();
UNIDOR(OP_READLINE);
case KEY_readpipe:
- set_csh();
- UNI(OP_BACKTICK);
+ UNIDOR(OP_BACKTICK);
case KEY_rewinddir:
UNI(OP_REWINDDIR);
case KEY_s:
s = scan_subst(s);
- if (yylval.opval)
+ if (pl_yylval.opval)
TERM(sublex_start());
else
TOKEN(1); /* force error */
(*s == ':' && s[1] == ':'))
{
#ifdef PERL_MAD
- SV *nametoke;
+ SV *nametoke = NULL;
#endif
PL_expect = XBLOCK;
Perl_croak(aTHX_ "Missing name in \"my sub\"");
PL_expect = XTERMBLOCK;
attrful = XATTRTERM;
- sv_setpvn(PL_subname,"?",1);
+ sv_setpvs(PL_subname,"?");
have_name = FALSE;
}
if (*s == '(') {
char *p;
bool bad_proto = FALSE;
- const bool warnsyntax = ckWARN(WARN_SYNTAX);
+ bool in_brackets = FALSE;
+ char greedy_proto = ' ';
+ bool proto_after_greedy_proto = FALSE;
+ bool must_be_last = FALSE;
+ bool underscore = FALSE;
+ bool seen_underscore = FALSE;
+ const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
s = scan_str(s,!!PL_madskills,FALSE);
if (!s)
for (p = d; *p; ++p) {
if (!isSPACE(*p)) {
d[tmp++] = *p;
- if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
- bad_proto = TRUE;
+
+ if (warnillegalproto) {
+ if (must_be_last)
+ proto_after_greedy_proto = TRUE;
+ if (!strchr("$@%*;[]&\\_", *p)) {
+ bad_proto = TRUE;
+ }
+ else {
+ if ( underscore ) {
+ if ( *p != ';' )
+ bad_proto = TRUE;
+ underscore = FALSE;
+ }
+ if ( *p == '[' ) {
+ in_brackets = TRUE;
+ }
+ else if ( *p == ']' ) {
+ in_brackets = FALSE;
+ }
+ else if ( (*p == '@' || *p == '%') &&
+ ( tmp < 2 || d[tmp-2] != '\\' ) &&
+ !in_brackets ) {
+ must_be_last = TRUE;
+ greedy_proto = *p;
+ }
+ else if ( *p == '_' ) {
+ underscore = seen_underscore = TRUE;
+ }
+ }
+ }
}
}
d[tmp] = '\0';
+ if (proto_after_greedy_proto)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Prototype after '%c' for %"SVf" : %s",
+ greedy_proto, SVfARG(PL_subname), d);
if (bad_proto)
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Illegal character in prototype for %"SVf" : %s",
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Illegal character %sin prototype for %"SVf" : %s",
+ seen_underscore ? "after '_' " : "",
SVfARG(PL_subname), d);
SvCUR_set(PL_lex_stuff, tmp);
have_proto = TRUE;
CURMAD('Q', PL_thisclose);
NEXTVAL_NEXTTOKE.opval =
(OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ PL_lex_stuff = NULL;
force_next(THING);
s = SKIPSPACE2(s,tmpwhite);
else if (*s != '{' && key == KEY_sub) {
if (!have_name)
Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
- else if (*s != ';')
+ else if (*s != ';' && *s != '}')
Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
}
}
#endif
if (!have_name) {
- sv_setpv(PL_subname,
- (const char *)
- (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
+ if (PL_curstash)
+ sv_setpvs(PL_subname, "__ANON__");
+ else
+ sv_setpvs(PL_subname, "__ANON__::__ANON__");
TOKEN(ANONSUB);
}
#ifndef PERL_MAD
}
case KEY_system:
- set_csh();
LOP(OP_SYSTEM,XREF);
case KEY_symlink:
UNI(OP_UNTIE);
case KEY_until:
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
OPERATOR(UNTIL);
case KEY_unless:
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
OPERATOR(UNLESS);
case KEY_unlink:
LOP(OP_VEC,XTERM);
case KEY_when:
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
OPERATOR(WHEN);
case KEY_while:
- yylval.ival = CopLINE(PL_curcop);
+ pl_yylval.ival = CopLINE(PL_curcop);
OPERATOR(WHILE);
case KEY_warn:
goto just_a_word;
case KEY_xor:
- yylval.ival = OP_XOR;
+ pl_yylval.ival = OP_XOR;
OPERATOR(OROP);
case KEY_y:
PADOFFSET tmp = 0;
/* pit holds the identifier we read and pending_ident is reset */
char pit = PL_pending_ident;
+ const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
+ /* All routes through this function want to know if there is a colon. */
+ const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
PL_pending_ident = 0;
/* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
*/
if (PL_in_my) {
if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
- if (strchr(PL_tokenbuf,':'))
+ if (has_colon)
yyerror(Perl_form(aTHX_ "No package name allowed for "
"variable %s in \"our\"",
PL_tokenbuf));
- tmp = allocmy(PL_tokenbuf);
+ tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
}
else {
- if (strchr(PL_tokenbuf,':'))
+ if (has_colon)
yyerror(Perl_form(aTHX_ PL_no_myglob,
PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
- yylval.opval = newOP(OP_PADANY, 0);
- yylval.opval->op_targ = allocmy(PL_tokenbuf);
+ pl_yylval.opval = newOP(OP_PADANY, 0);
+ pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
return PRIVATEREF;
}
}
(although why you'd do that is anyone's guess).
*/
- if (!strchr(PL_tokenbuf,':')) {
+ if (!has_colon) {
if (!PL_in_my)
- tmp = pad_findmy(PL_tokenbuf);
+ tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HEK * const stashname = HvNAME_HEK(stash);
SV * const sym = newSVhek(stashname);
sv_catpvs(sym, "::");
- sv_catpv(sym, PL_tokenbuf+1);
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
- yylval.opval->op_private = OPpCONST_ENTERED;
+ sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
+ pl_yylval.opval->op_private = OPpCONST_ENTERED;
gv_fetchsv(sym,
(PL_in_eval
? (GV_ADDMULTI | GV_ADDINEVAL)
}
}
- yylval.opval = newOP(OP_PADANY, 0);
- yylval.opval->op_targ = tmp;
+ pl_yylval.opval = newOP(OP_PADANY, 0);
+ pl_yylval.opval->op_targ = tmp;
return PRIVATEREF;
}
}
and @foo isn't a variable we can find in the symbol
table.
*/
- if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
- GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
+ if (ckWARN(WARN_AMBIGUOUS) &&
+ pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
+ GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
+ SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
- && ckWARN(WARN_AMBIGUOUS)
/* DO NOT warn for @- and @+ */
&& !( PL_tokenbuf[2] == '\0' &&
( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
{
/* Downgraded from fatal to warning 20000522 mjd */
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of %s in string",
- PL_tokenbuf);
+ "Possible unintended interpolation of %s in string",
+ PL_tokenbuf);
}
}
/* build ops for a bareword */
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
- yylval.opval->op_private = OPpCONST_ENTERED;
- gv_fetchpv(
- PL_tokenbuf+1,
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
+ tokenbuf_len - 1));
+ pl_yylval.opval->op_private = OPpCONST_ENTERED;
+ gv_fetchpvn_flags(
+ PL_tokenbuf + 1, tokenbuf_len - 1,
/* If the identifier refers to a stash, don't autovivify it.
* Change 24660 had the side effect of causing symbol table
* hashes to always be defined, even if they were freshly
* tests still give the expected answers, even though what
* they're actually testing has now changed subtly.
*/
- (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
+ (*PL_tokenbuf == '%'
+ && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
+ && d[-1] == ':'
? 0
: PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
((PL_tokenbuf[0] == '$') ? SVt_PV
Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
{
dVAR;
+
+ PERL_ARGS_ASSERT_KEYWORD;
+
switch (len)
{
case 1: /* 5 tokens of length 1 */
goto unknown;
- case 'r':
- if (name[2] == 'r')
- { /* err */
- return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
- }
-
- goto unknown;
-
case 'x':
if (name[2] == 'p')
{ /* exp */
name[4] == 'i' &&
name[5] == 'f')
{ /* elseif */
- if(ckWARN_d(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
}
goto unknown;
{
dVAR;
+ PERL_ARGS_ASSERT_CHECKCOMMA;
+
if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
if (ckWARN(WARN_SYNTAX)) {
int level = 1;
}
while (isSPACE(*w))
++w;
- if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
+ /* the list of chars below is for end of statements or
+ * block / parens, boolean operators (&&, ||, //) and branch
+ * constructs (or, and, if, until, unless, while, err, for).
+ * Not a very solid hack... */
+ if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"%s (...) interpreted as function",name);
}
and type is used with error messages only. */
STATIC SV *
-S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
- const char *type)
+S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
+ SV *sv, SV *pv, const char *type, STRLEN typelen)
{
dVAR; dSP;
HV * const table = GvHV(PL_hintgv); /* ^H */
SV *cv, *typesv;
const char *why1 = "", *why2 = "", *why3 = "";
+ PERL_ARGS_ASSERT_NEW_CONSTANT;
+
if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
SV *msg;
SvREFCNT_dec(msg);
return sv;
}
- cvp = hv_fetch(table, key, strlen(key), FALSE);
+
+ /* charnames doesn't work well if there have been errors found */
+ if (PL_error_count > 0 && strEQ(key,"charnames"))
+ return &PL_sv_undef;
+
+ cvp = hv_fetch(table, key, keylen, FALSE);
if (!cvp || !SvOK(*cvp)) {
why1 = "$^H{";
why2 = key;
sv_2mortal(sv); /* Parent created it permanently */
cv = *cvp;
if (!pv && s)
- pv = sv_2mortal(newSVpvn(s, len));
+ pv = newSVpvn_flags(s, len, SVs_TEMP);
if (type && pv)
- typesv = sv_2mortal(newSVpv(type, 0));
+ typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
else
typesv = &PL_sv_undef;
dVAR;
register char *d = dest;
register char * const e = d + destlen - 3; /* two-character token, ending NUL */
+
+ PERL_ARGS_ASSERT_SCAN_WORD;
+
for (;;) {
if (d >= e)
Perl_croak(aTHX_ ident_too_long);
char *bracket = NULL;
char funny = *s++;
register char *d = dest;
- register char * const e = d + destlen + 3; /* two-character token, ending NUL */
+ register char * const e = d + destlen - 3; /* two-character token, ending NUL */
+
+ PERL_ARGS_ASSERT_SCAN_IDENT;
if (isSPACE(*s))
s = PEEKSPACE(s);
return s;
}
-void
-Perl_pmflag(pTHX_ U32* pmfl, int ch)
-{
- PERL_UNUSED_CONTEXT;
- if (ch<256) {
- char c = (char)ch;
- switch (c) {
- CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
- case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
- case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
- case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
- case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
- }
+static U32
+S_pmflag(U32 pmfl, const char ch) {
+ switch (ch) {
+ CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
+ case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
+ case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
+ case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
+ case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
}
+ return pmfl;
}
STATIC char *
char *modstart;
#endif
+ PERL_ARGS_ASSERT_SCAN_PAT;
if (!s) {
const char * const delimiter = skipspace(start);
}
pm = (PMOP*)newPMOP(type, 0);
- if (PL_multi_open == '?')
+ if (PL_multi_open == '?') {
+ /* This is the only point in the code that sets PMf_ONCE: */
pm->op_pmflags |= PMf_ONCE;
+
+ /* Hence it's safe to do this bit of PMOP book-keeping here, which
+ allows us to restrict the list needed by reset to just the ??
+ matches. */
+ assert(type != OP_TRANS);
+ if (PL_curstash) {
+ MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
+ U32 elements;
+ if (!mg) {
+ mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
+ 0);
+ }
+ elements = mg->mg_len / sizeof(PMOP**);
+ Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
+ ((PMOP**)mg->mg_ptr) [elements++] = pm;
+ mg->mg_len = elements * sizeof(PMOP**);
+ PmopSTASH_set(pm,PL_curstash);
+ }
+ }
#ifdef PERL_MAD
modstart = s;
#endif
while (*s && strchr(valid_flags, *s))
- pmflag(&pm->op_pmflags,*s++);
+ pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
#ifdef PERL_MAD
if (PL_madskills && modstart != s) {
SV* tmptoken = newSVpvn(modstart, s - modstart);
}
#endif
/* issue a warning if /c is specified,but /g is not */
- if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
- && ckWARN(WARN_REGEXP))
+ if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
{
- Perl_warner(aTHX_ packWARN(WARN_REGEXP),
- "Use of /c modifier is meaningless without /g" );
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
+ "Use of /c modifier is meaningless without /g" );
}
- pm->op_pmpermflags = pm->op_pmflags;
-
PL_lex_op = (OP*)pm;
- yylval.ival = OP_MATCH;
+ pl_yylval.ival = OP_MATCH;
return s;
}
char *modstart;
#endif
- yylval.ival = OP_NULL;
+ PERL_ARGS_ASSERT_SCAN_SUBST;
+
+ pl_yylval.ival = OP_NULL;
s = scan_str(start,!!PL_madskills,FALSE);
es++;
}
else if (strchr(S_PAT_MODS, *s))
- pmflag(&pm->op_pmflags,*s++);
+ pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
else
break;
}
PL_thismad = 0;
}
#endif
- if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
- Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
+ if ((pm->op_pmflags & PMf_CONTINUE)) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
}
if (es) {
PL_sublex_info.super_bufend = PL_bufend;
PL_multi_end = 0;
pm->op_pmflags |= PMf_EVAL;
- while (es-- > 0)
- sv_catpv(repl, (const char *)(es ? "eval " : "do "));
+ while (es-- > 0) {
+ if (es)
+ sv_catpvs(repl, "eval ");
+ else
+ sv_catpvs(repl, "do ");
+ }
sv_catpvs(repl, "{");
sv_catsv(repl, PL_lex_repl);
if (strchr(SvPVX(PL_lex_repl), '#'))
PL_lex_repl = repl;
}
- pm->op_pmpermflags = pm->op_pmflags;
PL_lex_op = (OP*)pm;
- yylval.ival = OP_SUBST;
+ pl_yylval.ival = OP_SUBST;
return s;
}
register char* s;
OP *o;
short *tbl;
- I32 squash;
- I32 del;
- I32 complement;
+ U8 squash;
+ U8 del;
+ U8 complement;
#ifdef PERL_MAD
char *modstart;
#endif
- yylval.ival = OP_NULL;
+ PERL_ARGS_ASSERT_SCAN_TRANS;
+
+ pl_yylval.ival = OP_NULL;
s = scan_str(start,!!PL_madskills,FALSE);
if (!s)
(DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
PL_lex_op = o;
- yylval.ival = OP_TRANS;
+ pl_yylval.ival = OP_TRANS;
#ifdef PERL_MAD
if (PL_madskills) {
PL_realtokenstart = -1;
#endif
+ PERL_ARGS_ASSERT_SCAN_HEREDOC;
+
s += 2;
d = PL_tokenbuf;
e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
else
term = '"';
if (!isALNUM_lazy_if(s,UTF))
- deprecate_old("bare << to mean <<\"\"");
+ deprecate("bare << to mean <<\"\"");
for (; isALNUM_lazy_if(s,UTF); s++) {
if (d < e)
*d++ = *s;
PL_last_lop = PL_last_uni = NULL;
}
else
- sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
+ sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
while (s >= PL_bufend) { /* multiple line string? */
#ifdef PERL_MAD
if (PL_madskills) {
PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
}
#endif
- if (!outer ||
- !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
+ PL_bufptr = s;
+ CopLINE_inc(PL_curcop);
+ if (!outer || !lex_next_chunk(0)) {
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
missingterm(PL_tokenbuf);
}
+ CopLINE_dec(PL_curcop);
+ s = PL_bufptr;
#ifdef PERL_MAD
stuffstart = s - SvPVX(PL_linestr);
#endif
else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
PL_bufend[-1] = '\n';
#endif
- if (PERLDB_LINE && PL_curstash != PL_debstash)
- update_debugger_info(PL_linestr, NULL, 0);
if (*s == term && memEQ(s,PL_tokenbuf,len)) {
STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
*(SvPVX(PL_linestr) + off ) = ' ';
sv_recode_to_utf8(tmpstr, PL_encoding);
}
PL_lex_stuff = tmpstr;
- yylval.ival = op_type;
+ pl_yylval.ival = op_type;
return s;
}
/* scan_inputsymbol
takes: current position in input buffer
returns: new position in input buffer
- side-effects: yylval and lex_op are set.
+ side-effects: pl_yylval and lex_op are set.
This code handles:
register char *s = start; /* current position in buffer */
char *end;
I32 len;
-
char *d = PL_tokenbuf; /* start of temp holding space */
const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
+ PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
+
end = strchr(s, '\n');
if (!end)
end = PL_bufend;
*/
if (d - PL_tokenbuf != len) {
- yylval.ival = OP_GLOB;
- set_csh();
+ pl_yylval.ival = OP_GLOB;
s = scan_str(start,!!PL_madskills,FALSE);
if (!s)
Perl_croak(aTHX_ "Glob not terminated");
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy(d);
+ const PADOFFSET tmp = pad_findmy(d, len, 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
}
if (!readline_overriden)
PL_lex_op->op_flags |= OPf_SPECIAL;
- /* we created the ops in PL_lex_op, so make yylval.ival a null op */
- yylval.ival = OP_NULL;
+ /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
+ pl_yylval.ival = OP_NULL;
}
/* If it's none of the above, it must be a literal filehandle
newGVOP(OP_GV, 0, gv),
newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
: (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
- yylval.ival = OP_NULL;
+ pl_yylval.ival = OP_NULL;
}
}
char *tstart;
#endif
+ PERL_ARGS_ASSERT_SCAN_STR;
+
/* skip space before the delimiter */
if (isSPACE(*s)) {
s = PEEKSPACE(s);
PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
}
#endif
- if (!PL_rsfp ||
- !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
+ CopLINE_inc(PL_curcop);
+ PL_bufptr = PL_bufend;
+ if (!lex_next_chunk(0)) {
sv_free(sv);
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
return NULL;
}
+ s = PL_bufptr;
#ifdef PERL_MAD
stuffstart = 0;
#endif
- /* we read a line, so increment our line counter */
- CopLINE_inc(PL_curcop);
-
- /* update debugger info */
- if (PERLDB_LINE && PL_curstash != PL_debstash)
- update_debugger_info(PL_linestr, NULL, 0);
-
- /* having changed the buffer, we must update PL_bufend */
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
}
/* at this point, we have successfully read the delimited string */
scan_num
takes: pointer to position in buffer
returns: pointer to new position in buffer
- side-effects: builds ops for the constant in yylval.op
+ side-effects: builds ops for the constant in pl_yylval.op
Read a number in any of the formats that Perl accepts:
const char *lastub = NULL; /* position of last underbar */
static char const number_too_long[] = "Number too long";
+ PERL_ARGS_ASSERT_SCAN_NUM;
+
/* We use the first character to decide what type of number this is */
switch (*s) {
}
if (*s == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
"Misplaced _ in number");
lastub = s++;
}
/* _ are ignored -- but warned about if consecutive */
case '_':
- if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ if (lastub && s == lastub + 1)
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
break;
&& !(PL_hints & HINT_NEW_BINARY)) {
overflowed = TRUE;
n = (NV) u;
- if (ckWARN_d(WARN_OVERFLOW))
- Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in %s number",
- base);
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
+ "Integer overflow in %s number",
+ base);
} else
u = x | b; /* add the digit to the end */
}
/* final misplaced underbar check */
if (s[-1] == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
}
sv = newSV(0);
if (overflowed) {
- if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
- Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
- "%s number > %s non-portable",
- Base, max);
+ if (n > 4294967295.0)
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
+ "%s number > %s non-portable",
+ Base, max);
sv_setnv(sv, n);
}
else {
#if UVSIZE > 4
- if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
- Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
- "%s number > %s non-portable",
- Base, max);
+ if (u > 0xffffffff)
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
+ "%s number > %s non-portable",
+ Base, max);
#endif
sv_setuv(sv, u);
}
if (just_zero && (PL_hints & HINT_NEW_INTEGER))
sv = new_constant(start, s - start, "integer",
- sv, NULL, NULL);
+ sv, NULL, NULL, 0);
else if (PL_hints & HINT_NEW_BINARY)
- sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
+ sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
}
break;
if -w is on
*/
if (*s == '_') {
- if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ if (lastub && s == lastub + 1)
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
}
else {
/* final misplaced underbar check */
if (lastub && s == lastub + 1) {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
}
/* read a decimal portion if there is one. avoid
*d++ = *s++;
if (*s == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s;
}
if (d >= e)
Perl_croak(aTHX_ number_too_long);
if (*s == '_') {
- if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ if (lastub && s == lastub + 1)
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s;
}
else
}
/* fractional part ending in underbar? */
if (s[-1] == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
}
if (*s == '.' && isDIGIT(s[1])) {
/* oops, it's really a v-string, but without the "v" */
/* stray preinitial _ */
if (*s == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
}
/* stray initial _ */
if (*s == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
}
}
else {
if (((lastub && s == lastub + 1) ||
- (!isDIGIT(s[1]) && s[1] != '_'))
- && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ (!isDIGIT(s[1]) && s[1] != '_')))
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
}
}
sv_setnv(sv, nv);
}
- if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
- (PL_hints & HINT_NEW_INTEGER) )
- sv = new_constant(PL_tokenbuf,
- d - PL_tokenbuf,
- (const char *)
- (floatit ? "float" : "integer"),
- sv, NULL, NULL);
+ if ( floatit
+ ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
+ const char *const key = floatit ? "float" : "integer";
+ const STRLEN keylen = floatit ? 5 : 7;
+ sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
+ key, keylen, sv, NULL, NULL, 0);
+ }
break;
/* if it starts with a v, it could be a v-string */
case 'v':
vstring:
sv = newSV(5); /* preallocate storage space */
- s = scan_vstring(s,sv);
+ s = scan_vstring(s, PL_bufend, sv);
break;
}
bool eofmt = FALSE;
#ifdef PERL_MAD
char *tokenstart = s;
- SV* savewhite;
-
+ SV* savewhite = NULL;
+
if (PL_madskills) {
savewhite = PL_thiswhite;
PL_thiswhite = 0;
}
#endif
+ PERL_ARGS_ASSERT_SCAN_FORMLINE;
+
while (!needargs) {
if (*s == '.') {
t = s+1;
}
s = (char*)eol;
if (PL_rsfp) {
+ bool got_some;
#ifdef PERL_MAD
if (PL_madskills) {
if (PL_thistoken)
PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
}
#endif
- s = filter_gets(PL_linestr, PL_rsfp, 0);
+ PL_bufptr = PL_bufend;
+ CopLINE_inc(PL_curcop);
+ got_some = lex_next_chunk(0);
+ CopLINE_dec(PL_curcop);
+ s = PL_bufptr;
#ifdef PERL_MAD
- tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
-#else
- PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
+ tokenstart = PL_bufptr;
#endif
- PL_bufend = PL_bufptr + SvCUR(PL_linestr);
- PL_last_lop = PL_last_uni = NULL;
- if (!s) {
- s = PL_bufptr;
+ if (!got_some)
break;
- }
}
incline(s);
}
return s;
}
-STATIC void
-S_set_csh(pTHX)
-{
-#ifdef CSH
- dVAR;
- if (!PL_cshlen)
- PL_cshlen = strlen(PL_cshname);
-#else
-#if defined(USE_ITHREADS)
- PERL_UNUSED_CONTEXT;
-#endif
-#endif
-}
-
I32
Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
{
save_item(PL_subname);
SAVESPTR(PL_compcv);
- PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
+ PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
CvFLAGS(PL_compcv) |= flags;
PL_subline = CopLINE(PL_curcop);
CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
- CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
+ CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
return oldsavestack_ix;
#ifdef __SC__
#pragma segment Perl_yylex
#endif
-int
-Perl_yywarn(pTHX_ const char *s)
+static int
+S_yywarn(pTHX_ const char *const s)
{
dVAR;
+
+ PERL_ARGS_ASSERT_YYWARN;
+
PL_in_eval |= EVAL_WARNONLY;
yyerror(s);
PL_in_eval &= ~EVAL_WARNONLY;
}
int
-Perl_yyerror(pTHX_ const char *s)
+Perl_yyerror(pTHX_ const char *const s)
{
dVAR;
const char *where = NULL;
SV *msg;
int yychar = PL_parser->yychar;
+ PERL_ARGS_ASSERT_YYERROR;
+
if (!yychar || (yychar == ';' && !PL_rsfp))
where = "at EOF";
else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
where = "within string";
}
else {
- SV * const where_sv = sv_2mortal(newSVpvs("next char "));
+ SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
if (yychar < 32)
Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
- else if (isPRINT_LC(yychar))
- Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
+ else if (isPRINT_LC(yychar)) {
+ const char string = yychar;
+ sv_catpvn(where_sv, &string, 1);
+ }
else
Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
where = SvPVX_const(where_sv);
(int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
PL_multi_end = 0;
}
- if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
+ if (PL_in_eval & EVAL_WARNONLY) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
+ }
else
qerror(msg);
if (PL_error_count >= 10) {
{
dVAR;
const STRLEN slen = SvCUR(PL_linestr);
+
+ PERL_ARGS_ASSERT_SWALLOW_BOM;
+
switch (s[0]) {
case 0xFF:
if (s[1] == 0xFE) {
- /* UTF-16 little-endian? (or UTF32-LE?) */
+ /* UTF-16 little-endian? (or UTF-32LE?) */
if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
- Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
#ifndef PERL_NO_UTF16_FILTER
- if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
s += 2;
- utf16le:
if (PL_bufend > (char*)s) {
- U8 *news;
- I32 newlen;
-
- filter_add(utf16rev_textfilter, NULL);
- Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
- &newlen);
- sv_setpvn(PL_linestr, (const char*)news, newlen);
-#ifdef PERL_MAD
- s = (U8*)SvPVX(PL_linestr);
- Copy(news, s, newlen, U8);
- s[newlen] = '\0';
-#endif
- Safefree(news);
- SvUTF8_on(PL_linestr);
- s = (U8*)SvPVX(PL_linestr);
-#ifdef PERL_MAD
- /* FIXME - is this a general bug fix? */
- s[newlen] = '\0';
-#endif
- PL_bufend = SvPVX(PL_linestr) + newlen;
+ s = add_utf16_textfilter(s, TRUE);
}
#else
- Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
#endif
}
break;
#ifndef PERL_NO_UTF16_FILTER
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
s += 2;
- utf16be:
if (PL_bufend > (char *)s) {
- U8 *news;
- I32 newlen;
-
- filter_add(utf16_textfilter, NULL);
- Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- utf16_to_utf8(s, news,
- PL_bufend - (char*)s,
- &newlen);
- sv_setpvn(PL_linestr, (const char*)news, newlen);
- Safefree(news);
- SvUTF8_on(PL_linestr);
- s = (U8*)SvPVX(PL_linestr);
- PL_bufend = SvPVX(PL_linestr) + newlen;
+ s = add_utf16_textfilter(s, FALSE);
}
#else
- Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
#endif
}
break;
if (s[1] == 0) {
if (s[2] == 0xFE && s[3] == 0xFF) {
/* UTF-32 big-endian */
- Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
}
}
else if (s[2] == 0 && s[3] != 0) {
/* Leading bytes
* 00 xx 00 xx
* are a good indicator of UTF-16BE. */
+#ifndef PERL_NO_UTF16_FILTER
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
- goto utf16be;
+ s = add_utf16_textfilter(s, FALSE);
+#else
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
+#endif
}
}
#ifdef EBCDIC
/* Leading bytes
* xx 00 xx 00
* are a good indicator of UTF-16LE. */
+#ifndef PERL_NO_UTF16_FILTER
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
- goto utf16le;
+ s = add_utf16_textfilter(s, TRUE);
+#else
+ Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
+#endif
}
}
return (char*)s;
}
-/*
- * restore_rsfp
- * Restore a source filter.
- */
-
-static void
-restore_rsfp(pTHX_ void *f)
-{
- dVAR;
- PerlIO * const fp = (PerlIO*)f;
-
- if (PL_rsfp == PerlIO_stdin())
- PerlIO_clearerr(PL_rsfp);
- else if (PL_rsfp && (PL_rsfp != fp))
- PerlIO_close(PL_rsfp);
- PL_rsfp = fp;
-}
#ifndef PERL_NO_UTF16_FILTER
static I32
-utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
+S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
dVAR;
- const STRLEN old = SvCUR(sv);
- const I32 count = FILTER_READ(idx+1, sv, maxlen);
+ SV *const filter = FILTER_DATA(idx);
+ /* We re-use this each time round, throwing the contents away before we
+ return. */
+ SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
+ SV *const utf8_buffer = filter;
+ IV status = IoPAGE(filter);
+ const bool reverse = cBOOL(IoLINES(filter));
+ I32 retval;
+
+ /* As we're automatically added, at the lowest level, and hence only called
+ from this file, we can be sure that we're not called in block mode. Hence
+ don't bother writing code to deal with block mode. */
+ if (maxlen) {
+ Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
+ }
+ if (status < 0) {
+ Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
+ }
DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16_textfilter(%p): %d %d (%d)\n",
- FPTR2DPTR(void *, utf16_textfilter),
- idx, maxlen, (int) count));
- if (count) {
- U8* tmps;
+ "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+ FPTR2DPTR(void *, S_utf16_textfilter),
+ reverse ? 'l' : 'b', idx, maxlen, status,
+ (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
+
+ while (1) {
+ STRLEN chars;
+ STRLEN have;
I32 newlen;
- Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- Copy(SvPVX_const(sv), tmps, old, char);
- utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
- SvCUR(sv) - old, &newlen);
- sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
- }
- DEBUG_P({sv_dump(sv);});
- return SvCUR(sv);
+ U8 *end;
+ /* First, look in our buffer of existing UTF-8 data: */
+ char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
+
+ if (nl) {
+ ++nl;
+ } else if (status == 0) {
+ /* EOF */
+ IoPAGE(filter) = 0;
+ nl = SvEND(utf8_buffer);
+ }
+ if (nl) {
+ STRLEN got = nl - SvPVX(utf8_buffer);
+ /* Did we have anything to append? */
+ retval = got != 0;
+ sv_catpvn(sv, SvPVX(utf8_buffer), got);
+ /* Everything else in this code works just fine if SVp_POK isn't
+ set. This, however, needs it, and we need it to work, else
+ we loop infinitely because the buffer is never consumed. */
+ sv_chop(utf8_buffer, nl);
+ break;
+ }
+
+ /* OK, not a complete line there, so need to read some more UTF-16.
+ Read an extra octect if the buffer currently has an odd number. */
+ while (1) {
+ if (status <= 0)
+ break;
+ if (SvCUR(utf16_buffer) >= 2) {
+ /* Location of the high octet of the last complete code point.
+ Gosh, UTF-16 is a pain. All the benefits of variable length,
+ *coupled* with all the benefits of partial reads and
+ endianness. */
+ const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
+ + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
+
+ if (*last_hi < 0xd8 || *last_hi > 0xdb) {
+ break;
+ }
+
+ /* We have the first half of a surrogate. Read more. */
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
+ }
+
+ status = FILTER_READ(idx + 1, utf16_buffer,
+ 160 + (SvCUR(utf16_buffer) & 1));
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
+ DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
+ if (status < 0) {
+ /* Error */
+ IoPAGE(filter) = status;
+ return status;
+ }
+ }
+
+ chars = SvCUR(utf16_buffer) >> 1;
+ have = SvCUR(utf8_buffer);
+ SvGROW(utf8_buffer, have + chars * 3 + 1);
+
+ if (reverse) {
+ end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
+ (U8*)SvPVX_const(utf8_buffer) + have,
+ chars * 2, &newlen);
+ } else {
+ end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
+ (U8*)SvPVX_const(utf8_buffer) + have,
+ chars * 2, &newlen);
+ }
+ SvCUR_set(utf8_buffer, have + newlen);
+ *end = '\0';
+
+ /* No need to keep this SV "well-formed" with a '\0' after the end, as
+ it's private to us, and utf16_to_utf8{,reversed} take a
+ (pointer,length) pair, rather than a NUL-terminated string. */
+ if(SvCUR(utf16_buffer) & 1) {
+ *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
+ SvCUR_set(utf16_buffer, 1);
+ } else {
+ SvCUR_set(utf16_buffer, 0);
+ }
+ }
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+ status,
+ (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
+ DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
+ return retval;
}
-static I32
-utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
+static U8 *
+S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
{
- dVAR;
- const STRLEN old = SvCUR(sv);
- const I32 count = FILTER_READ(idx+1, sv, maxlen);
- DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16rev_textfilter(%p): %d %d (%d)\n",
- FPTR2DPTR(void *, utf16rev_textfilter),
- idx, maxlen, (int) count));
- if (count) {
- U8* tmps;
- I32 newlen;
- Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- Copy(SvPVX_const(sv), tmps, old, char);
- utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
- SvCUR(sv) - old, &newlen);
- sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
+ SV *filter = filter_add(S_utf16_textfilter, NULL);
+
+ IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
+ sv_setpvs(filter, "");
+ IoLINES(filter) = reversed;
+ IoPAGE(filter) = 1; /* Not EOF */
+
+ /* Sadly, we have to return a valid pointer, come what may, so we have to
+ ignore any error return from this. */
+ SvCUR_set(PL_linestr, 0);
+ if (FILTER_READ(0, PL_linestr, 0)) {
+ SvUTF8_on(PL_linestr);
+ } else {
+ SvUTF8_on(PL_linestr);
}
- DEBUG_P({ sv_dump(sv); });
- return count;
+ PL_bufend = SvEND(PL_linestr);
+ return (U8*)SvPVX(PL_linestr);
}
#endif
Function must be called like
sv = newSV(5);
- s = scan_vstring(s,sv);
+ s = scan_vstring(s,e,sv);
+where s and e are the start and end of the string.
The sv should already be large enough to store the vstring
passed in, for performance reasons.
*/
char *
-Perl_scan_vstring(pTHX_ const char *s, SV *sv)
+Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
{
dVAR;
const char *pos = s;
const char *start = s;
+
+ PERL_ARGS_ASSERT_SCAN_VSTRING;
+
if (*pos == 'v') pos++; /* get past 'v' */
- while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
+ while (pos < e && (isDIGIT(*pos) || *pos == '_'))
pos++;
if ( *pos != '.') {
/* this may not be a v-string if followed by => */
const char *next = pos;
- while (next < PL_bufend && isSPACE(*next))
+ while (next < e && isSPACE(*next))
++next;
- if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
+ if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
/* return string not v-string */
sv_setpvn(sv,(char *)s,pos-s);
return (char *)pos;
if (*s == 'v')
s++; /* get past 'v' */
- sv_setpvn(sv, "", 0);
+ sv_setpvs(sv, "");
for (;;) {
/* this is atoi() that tolerates underscores */
const UV orev = rev;
rev += (*end - '0') * mult;
mult *= 10;
- if (orev > rev && ckWARN_d(WARN_OVERFLOW))
- Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in decimal number");
+ if (orev > rev)
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
+ "Integer overflow in decimal number");
}
}
#ifdef EBCDIC
sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
SvUTF8_on(sv);
- if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
+ if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
s = ++pos;
else {
s = pos;
break;
}
- while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
+ while (pos < e && (isDIGIT(*pos) || *pos == '_'))
pos++;
}
SvPOK_on(sv);
return (char *)s;
}
+int
+Perl_keyword_plugin_standard(pTHX_
+ char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
+{
+ PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
+ PERL_UNUSED_CONTEXT;
+ PERL_UNUSED_ARG(keyword_ptr);
+ PERL_UNUSED_ARG(keyword_len);
+ PERL_UNUSED_ARG(op_ptr);
+ return KEYWORD_PLUGIN_DECLINE;
+}
+
/*
* Local variables:
* c-indentation-style: bsd