* "It all comes from here, the stench and the peril." --Frodo
*/
-#define TMP_CRLF_PATCH
-
#include "EXTERN.h"
#include "perl.h"
+#define yychar PL_yychar
+#define yylval PL_yylval
+
#ifndef PERL_OBJECT
static void check_uni _((void));
static void force_next _((I32 type));
static char ident_too_long[] = "Identifier too long";
+#define UTF (PL_hints & HINT_UTF8)
+/*
+ * Note: we try to be careful never to call the isXXX_utf8() functions
+ * unless we're pretty sure we've seen the beginning of a UTF-8 character
+ * (that is, the two high bits are set). Otherwise we risk loading in the
+ * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
+ */
+#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
+ ? isIDFIRST(*(p)) \
+ : isIDFIRST_utf8((U8*)p))
+#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
+ ? isALNUM(*(p)) \
+ : isALNUM_utf8((U8*)p))
+
/* The following are arranged oddly so that the guard on the switch statement
* can get by with a single comparison (if the compiler is smart enough).
*/
#undef ff_next
#endif
+#ifdef USE_PURE_BISON
+YYSTYPE* yylval_pointer = NULL;
+int* yychar_pointer = NULL;
+#ifdef EMBED
+#undef yylval
+#undef yychar
+#endif
+#define yylval (*yylval_pointer)
+#define yychar (*yychar_pointer)
+#define YYLEXPARAM yylval_pointer,yychar_pointer
+#else
+#define YYLEXPARAM
+#endif
+
#include "keywords.h"
#ifdef CLINE
yywarn(form("%s found where operator expected", what));
if (is_first)
warn("\t(Missing semicolon on previous line?)\n");
- else if (PL_oldoldbufptr && isIDFIRST(*PL_oldoldbufptr)) {
+ else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
char *t;
- for (t = PL_oldoldbufptr; *t && (isALNUM(*t) || *t == ':'); t++) ;
+ for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
if (t < PL_bufptr && isSPACE(*t))
warn("\t(Do you need to predeclare %.*s?)\n",
t - PL_oldoldbufptr, PL_oldoldbufptr);
if (nl)
*nl = '\0';
}
- else if (PL_multi_close < 32 || PL_multi_close == 127) {
+ else if (
+#ifdef EBCDIC
+ iscntrl(PL_multi_close)
+#else
+ PL_multi_close < 32 || PL_multi_close == 127
+#endif
+ ) {
*tmpbuf = '^';
tmpbuf[1] = toCTRL(PL_multi_close);
s = "\\n";
void
deprecate(char *s)
{
- if (PL_dowarn)
- warn("Use of %s is deprecated", s);
+ dTHR;
+ if (ckWARN(WARN_DEPRECATED))
+ warner(WARN_DEPRECATED, "Use of %s is deprecated", s);
}
STATIC void
}
#endif
+#ifndef PERL_OBJECT
+
+STATIC I32
+utf16_textfilter(int idx, SV *sv, int maxlen)
+{
+ I32 count = FILTER_READ(idx+1, sv, maxlen);
+ if (count) {
+ U8* tmps;
+ U8* tend;
+ New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
+ tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
+ sv_usepvn(sv, (char*)tmps, tend - tmps);
+
+ }
+ return count;
+}
+
+STATIC I32
+utf16rev_textfilter(int idx, SV *sv, int maxlen)
+{
+ I32 count = FILTER_READ(idx+1, sv, maxlen);
+ if (count) {
+ U8* tmps;
+ U8* tend;
+ New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
+ tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
+ sv_usepvn(sv, (char*)tmps, tend - tmps);
+
+ }
+ return count;
+}
+
+#endif
void
lex_start(SV *line)
return;
while (isSPACE(*PL_last_uni))
PL_last_uni++;
- for (s = PL_last_uni; isALNUM(*s) || *s == '-'; s++) ;
+ for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
if ((t = strchr(s, '(')) && t < PL_bufptr)
return;
ch = *s;
start = skipspace(start);
s = start;
- if (isIDFIRST(*s) ||
+ if (isIDFIRST_lazy(s) ||
(allow_pack && *s == ':') ||
(allow_initial_tick && *s == '\'') )
{
if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
PL_lex_state = LEX_INTERPCASEMOD;
- return yylex();
+ return yylex(YYLEXPARAM);
}
/* Is there a right-hand side to take care of? */
register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
I32 len; /* ? */
+ I32 utf = PL_lex_inwhat == OP_TRANS
+ ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
+ : UTF;
+ I32 thisutf = PL_lex_inwhat == OP_TRANS
+ ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
+ : UTF;
/* leaveit is the set of acceptably-backslashed characters */
char *leaveit =
PL_lex_inpat
- ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+ ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
: "";
while (s < send || dorange) {
/* expand a range A-Z to the full set of characters. AIE! */
if (dorange) {
I32 i; /* current expanded character */
+ I32 min; /* first character in range */
I32 max; /* last character in range */
i = d - SvPVX(sv); /* remember current offset */
d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */
d -= 2; /* eat the first char and the - */
- max = (U8)d[1]; /* last char in range */
-
- for (i = (U8)*d; i <= max; i++)
- *d++ = i;
+ min = (U8)*d; /* first char in range */
+ max = (U8)d[1]; /* last char in range */
+
+#ifndef ASCIIish
+ if ((isLOWER(min) && isLOWER(max)) ||
+ (isUPPER(min) && isUPPER(max))) {
+ if (isLOWER(min)) {
+ for (i = min; i <= max; i++)
+ if (isLOWER(i))
+ *d++ = i;
+ } else {
+ for (i = min; i <= max; i++)
+ if (isUPPER(i))
+ *d++ = i;
+ }
+ }
+ else
+#endif
+ for (i = min; i <= max; i++)
+ *d++ = i;
/* mark the range as done, and continue */
dorange = FALSE;
/* range begins (ignore - as first or last char) */
else if (*s == '-' && s+1 < send && s != start) {
+ if (utf) {
+ *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
+ s++;
+ continue;
+ }
dorange = TRUE;
s++;
}
/* if we get here, we're not doing a transliteration */
- /* skip for regexp comments /(?#comment)/ */
+ /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
+ except for the last char, which will be done separately. */
else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
if (s[2] == '#') {
while (s < send && *s != ')')
*d++ = *s++;
- } else if (s[2] == '{') { /* This should march regcomp.c */
+ } else if (s[2] == '{'
+ || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
I32 count = 1;
- char *regparse = s + 3;
+ char *regparse = s + (s[2] == '{' ? 3 : 4);
char c;
while (count && (c = *regparse)) {
count--;
regparse++;
}
- if (*regparse == ')')
- regparse++;
- else
+ if (*regparse != ')') {
+ regparse--; /* Leave one char for continuation. */
yyerror("Sequence (?{...}) not terminated or not {}-balanced");
- while (s < regparse && *s != ')')
+ }
+ while (s < regparse)
*d++ = *s++;
}
}
}
/* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
- else if (*s == '@' && s[1] && (isALNUM(s[1]) || strchr(":'{$", s[1])))
+ else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
break;
/* check for embedded scalars. only stop if we're sure it's a
break; /* in regexp, $ might be tail anchor */
}
+ /* (now in tr/// code again) */
+
+ if (*s & 0x80 && thisutf) {
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_UTF8)) {
+ (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
+ if (len) {
+ while (len--)
+ *d++ = *s++;
+ continue;
+ }
+ }
+ }
+
/* backslashes */
if (*s == '\\' && s+1 < send) {
s++;
if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
{
- if (PL_dowarn)
- warn("\\%c better written as $%c", *s, *s);
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_SYNTAX))
+ warner(WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
*--s = '$';
break;
}
/* \x24 indicates a hex constant */
case 'x':
- *d++ = scan_hex(++s, 2, &len);
- s += len;
+ ++s;
+ if (*s == '{') {
+ char* e = strchr(s, '}');
+
+ if (!e) {
+ yyerror("Missing right brace on \\x{}");
+ e = s;
+ }
+ if (!utf) {
+ dTHR;
+ if (ckWARN(WARN_UTF8))
+ warner(WARN_UTF8,
+ "Use of \\x{} without utf8 declaration");
+ }
+ /* note: utf always shorter than hex */
+ d = (char*)uv_to_utf8((U8*)d,
+ scan_hex(s + 1, e - s - 1, &len));
+ s = e + 1;
+
+ }
+ else {
+ UV uv = (UV)scan_hex(s, 2, &len);
+ if (utf && PL_lex_inwhat == OP_TRANS &&
+ utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
+ {
+ d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
+ }
+ else {
+ if (uv >= 127 && UTF) {
+ dTHR;
+ if (ckWARN(WARN_UTF8))
+ warner(WARN_UTF8,
+ "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
+ len,s,len,s);
+ }
+ *d++ = (char)uv;
+ }
+ s += len;
+ }
continue;
/* \c is a control character */
case 'c':
s++;
+#ifdef EBCDIC
+ *d = *s++;
+ if (isLOWER(*d))
+ *d = toUPPER(*d);
+ *d++ = toCTRL(*d);
+#else
len = *s++;
*d++ = toCTRL(len);
+#endif
continue;
/* printf-style backslashes, formfeeds, newlines, etc */
case '&':
case '$':
weight -= seen[un_char] * 10;
- if (isALNUM(s[1])) {
+ if (isALNUM_lazy(s+1)) {
scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
weight -= 100;
if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
return;
/* if filter is on top of stack (usual case) just pop it off */
- if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (void*)funcp){
+ if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (DIR*)funcp){
sv_free(av_pop(PL_rsfp_filters));
return;
else
return Nullch ;
}
- else
+ else
return (sv_gets(sv, fp, append));
}
{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "TERMBLOCK" };
#endif
-EXT int yychar; /* last token */
-
/*
yylex
if we already built the token before, use it.
*/
-int
-yylex(void)
+int yylex
+#ifdef USE_PURE_BISON
+(YYSTYPE* lvalp, int* lcharp)
+#else
+(void)
+#endif
{
dTHR;
register char *s;
GV *gv = Nullgv;
GV **gvp = 0;
+#ifdef USE_PURE_BISON
+ yylval_pointer = lvalp;
+ yychar_pointer = lcharp;
+#endif
+
/* check if there's an identifier for us to look at */
if (PL_pending_ident) {
/* pit holds the identifier we read and pending_ident is reset */
if (PL_bufptr != PL_bufend)
PL_bufptr += 2;
PL_lex_state = LEX_INTERPCONCAT;
- return yylex();
+ return yylex(YYLEXPARAM);
}
else {
s = PL_bufptr + 1;
Aop(OP_CONCAT);
}
else
- return yylex();
+ return yylex(YYLEXPARAM);
}
case LEX_INTERPPUSH:
s = PL_bufptr;
Aop(OP_CONCAT);
}
- return yylex();
+ return yylex(YYLEXPARAM);
case LEX_INTERPENDMAYBE:
if (intuit_more(PL_bufptr)) {
Aop(OP_CONCAT);
else {
PL_bufptr = s;
- return yylex();
+ return yylex(YYLEXPARAM);
}
}
- return yylex();
+ return yylex(YYLEXPARAM);
case LEX_FORMLINE:
PL_lex_state = LEX_NORMAL;
s = scan_formline(PL_bufptr);
retry:
switch (*s) {
default:
- croak("Unrecognized character \\%03o", *s & 255);
+ if (isIDFIRST_lazy(s))
+ goto keylookup;
+ croak("Unrecognized character \\x%02X", *s & 255);
case 4:
case 26:
goto fake_eof; /* emulate EOF on ^D or ^Z */
else
(void)PerlIO_close(PL_rsfp);
PL_rsfp = Nullfp;
+ PL_doextract = FALSE;
}
if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
PL_bufptr = s;
PL_lex_state = LEX_FORMLINE;
- return yylex();
+ return yylex(YYLEXPARAM);
}
goto retry;
case '\r':
-#ifndef TMP_CRLF_PATCH
+#ifdef PERL_STRICT_CR
warn("Illegal character \\%03o (carriage return)", '\r');
croak(
"(Maybe you didn't strip carriage returns after a network transfer?)\n");
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
PL_bufptr = s;
PL_lex_state = LEX_FORMLINE;
- return yylex();
+ return yylex(YYLEXPARAM);
}
}
else {
else if (*s == '>') {
s++;
s = skipspace(s);
- if (isIDFIRST(*s)) {
+ if (isIDFIRST_lazy(s)) {
s = force_word(s,METHOD,FALSE,TRUE,FALSE);
TOKEN(ARROW);
}
while (d < PL_bufend && (*d == ' ' || *d == '\t'))
d++;
}
- if (d < PL_bufend && isIDFIRST(*d)) {
+ if (d < PL_bufend && isIDFIRST_lazy(d)) {
d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
FALSE, &len);
while (d < PL_bufend && (*d == ' ' || *d == '\t'))
}
t++;
}
- else if (isALPHA(*s)) {
- for (t++; t < PL_bufend && isALNUM(*t); t++) ;
+ else if (isIDFIRST_lazy(s)) {
+ for (t++; t < PL_bufend && isALNUM_lazy(t); t++) ;
}
while (t < PL_bufend && isSPACE(*t))
t++;
|| (*t == '=' && t[1] == '>')))
OPERATOR(HASHBRACK);
if (PL_expect == XREF)
- PL_expect = XTERM;
+ PL_expect = XSTATE; /* was XTERM, trying XSTATE */
else {
PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
PL_expect = XSTATE;
if (PL_lex_fakebrack) {
PL_lex_state = LEX_INTERPEND;
PL_bufptr = s;
- return yylex(); /* ignore fake brackets */
+ return yylex(YYLEXPARAM); /* ignore fake brackets */
}
if (*s == '-' && s[1] == '>')
PL_lex_state = LEX_INTERPENDMAYBE;
if (PL_lex_brackets < PL_lex_fakebrack) {
PL_bufptr = s;
PL_lex_fakebrack = 0;
- return yylex(); /* ignore fake brackets */
+ return yylex(YYLEXPARAM); /* ignore fake brackets */
}
force_next('}');
TOKEN(';');
AOPERATOR(ANDAND);
s--;
if (PL_expect == XOPERATOR) {
- if (PL_dowarn && isALPHA(*s) && PL_bufptr == PL_linestart) {
+ if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
PL_curcop->cop_line--;
- warn(warn_nosemi);
+ warner(WARN_SEMICOLON, warn_nosemi);
PL_curcop->cop_line++;
}
BAop(OP_BIT_AND);
OPERATOR(',');
if (tmp == '~')
PMop(OP_MATCH);
- if (PL_dowarn && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
- warn("Reversed %c= operator",(int)tmp);
+ if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
+ warner(WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
s--;
if (PL_expect == XSTATE && isALPHA(tmp) &&
(s == PL_linestart+1 || s[-2] == '\n') )
}
if (PL_lex_brackets < PL_lex_formbrack) {
char *t;
+#ifdef PERL_STRICT_CR
for (t = s; *t == ' ' || *t == '\t'; t++) ;
+#else
+ for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
+#endif
if (*t == '\n' || *t == '#') {
s--;
PL_expect = XBLOCK;
}
}
- if (s[1] == '#' && (isALPHA(s[2]) || strchr("_{$:", s[2]))) {
+ if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
if (PL_expect == XOPERATOR)
no_op("Array length", PL_bufptr);
PL_tokenbuf[0] = '@';
char *t;
if (*s == '[') {
PL_tokenbuf[0] = '@';
- if (PL_dowarn) {
+ if (ckWARN(WARN_SYNTAX)) {
for(t = s + 1;
- isSPACE(*t) || isALNUM(*t) || *t == '$';
+ isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
t++) ;
if (*t++ == ',') {
PL_bufptr = skipspace(PL_bufptr);
while (t < PL_bufend && *t != ']')
t++;
- warn("Multidimensional syntax %.*s not supported",
- (t - PL_bufptr) + 1, PL_bufptr);
+ warner(WARN_SYNTAX,
+ "Multidimensional syntax %.*s not supported",
+ (t - PL_bufptr) + 1, PL_bufptr);
}
}
}
else if (*s == '{') {
PL_tokenbuf[0] = '%';
- if (PL_dowarn && strEQ(PL_tokenbuf+1, "SIG") &&
+ if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
(t = strchr(s, '}')) && (t = strchr(t, '=')))
{
char tmpbuf[sizeof PL_tokenbuf];
STRLEN len;
for (t++; isSPACE(*t); t++) ;
- if (isIDFIRST(*t)) {
+ if (isIDFIRST_lazy(t)) {
t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
- if (*t != '(' && perl_get_cv(tmpbuf, FALSE))
- warn("You need to quote \"%s\"", tmpbuf);
+ for (; isSPACE(*t); t++) ;
+ if (*t == ';' && perl_get_cv(tmpbuf, FALSE))
+ warner(WARN_SYNTAX,
+ "You need to quote \"%s\"", tmpbuf);
}
}
}
PL_expect = XOPERATOR;
else if (strchr("$@\"'`q", *s))
PL_expect = XTERM; /* e.g. print $fh "foo" */
- else if (strchr("&*<%", *s) && isIDFIRST(s[1]))
+ else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
PL_expect = XTERM; /* e.g. print $fh &sub */
- else if (isIDFIRST(*s)) {
+ else if (isIDFIRST_lazy(s)) {
char tmpbuf[sizeof PL_tokenbuf];
scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
if (tmp = keyword(tmpbuf, len)) {
PL_expect = XTERM; /* e.g. print $fh 3 */
else if (*s == '.' && isDIGIT(s[1]))
PL_expect = XTERM; /* e.g. print $fh .3 */
- else if (strchr("/?-+", *s) && !isSPACE(s[1]))
+ else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
PL_expect = XTERM; /* e.g. print $fh -1 */
- else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]))
+ else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
PL_expect = XTERM; /* print $fh <<"EOF" */
}
PL_pending_ident = '$';
PL_tokenbuf[0] = '%';
/* Warn about @ where they meant $. */
- if (PL_dowarn) {
+ if (ckWARN(WARN_SYNTAX)) {
if (*s == '[' || *s == '{') {
char *t = s + 1;
- while (*t && (isALNUM(*t) || strchr(" \t$#+-'\"", *t)))
+ while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
t++;
if (*t == '}' || *t == ']') {
t++;
PL_bufptr = skipspace(PL_bufptr);
- warn("Scalar value %.*s better written as $%.*s",
+ warner(WARN_SYNTAX,
+ "Scalar value %.*s better written as $%.*s",
t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
}
}
/* Disable warning on "study /blah/" */
if (PL_oldoldbufptr == PL_last_uni
&& (*PL_last_uni != 's' || s - PL_last_uni < 5
- || memNE(PL_last_uni, "study", 5) || isALNUM(PL_last_uni[5])))
+ || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
check_uni();
s = scan_pat(s,OP_MATCH);
TERM(sublex_start());
OPERATOR(tmp);
case '.':
- if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack && s[1] == '\n' &&
- (s == PL_linestart || s[-1] == '\n') ) {
+ if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
+#ifdef PERL_STRICT_CR
+ && s[1] == '\n'
+#else
+ && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
+#endif
+ && (s == PL_linestart || s[-1] == '\n') )
+ {
PL_lex_formbrack = 0;
PL_expect = XSTATE;
goto rightbracket;
missingterm((char*)0);
yylval.ival = OP_CONST;
for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
- if (*d == '$' || *d == '@' || *d == '\\') {
+ if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
yylval.ival = OP_STRINGIFY;
break;
}
case '\\':
s++;
- if (PL_dowarn && PL_lex_inwhat && isDIGIT(*s))
- warn("Can't use \\%c to mean $%c in expression", *s, *s);
+ if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
+ warner(WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
+ *s, *s);
if (PL_expect == XOPERATOR)
no_op("Backslash",s);
OPERATOR(REFGEN);
tmp = -tmp;
gv = Nullgv;
gvp = 0;
- if (PL_dowarn && hgv)
- warn("Ambiguous call resolved as CORE::%s(), %s"
+ if (ckWARN(WARN_AMBIGUOUS) && hgv
+ && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
+ warner(WARN_AMBIGUOUS,
+ "Ambiguous call resolved as CORE::%s(), %s",
GvENAME(hgv), "qualify as such or use &");
}
}
if (PL_expect == XOPERATOR) {
if (PL_bufptr == PL_linestart) {
PL_curcop->cop_line--;
- warn(warn_nosemi);
+ warner(WARN_SEMICOLON, warn_nosemi);
PL_curcop->cop_line++;
}
else
if (len > 2 &&
PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
{
- if (PL_dowarn && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
- warn("Bareword \"%s\" refers to nonexistent package",
+ if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
+ warner(WARN_UNSAFE,
+ "Bareword \"%s\" refers to nonexistent package",
PL_tokenbuf);
len -= 2;
PL_tokenbuf[len] = '\0';
/* Two barewords in a row may indicate method call. */
- if ((isALPHA(*s) || *s == '$') && (tmp=intuit_method(s,gv)))
+ if ((isIDFIRST_lazy(s) || *s == '$') && (tmp=intuit_method(s,gv)))
return tmp;
/* If not a declared subroutine, it's an indirect object. */
if (*s == '(') {
CLINE;
if (gv && GvCVu(gv)) {
+ CV *cv;
+ if ((cv = GvCV(gv)) && SvPOK(cv))
+ PL_last_proto = SvPV((SV*)cv, PL_na);
for (d = s + 1; *d == ' ' || *d == '\t'; d++) ;
- if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
+ if (*d == ')' && (sv = cv_const_sv(cv))) {
s = d + 1;
goto its_constant;
}
PL_expect = XOPERATOR;
force_next(WORD);
yylval.ival = 0;
+ PL_last_lop_op = OP_ENTERSUB;
TOKEN('&');
}
/* If followed by a bareword, see if it looks like indir obj. */
- if ((isALPHA(*s) || *s == '$') && (tmp = intuit_method(s,gv)))
+ if ((isIDFIRST_lazy(s) || *s == '$') && (tmp = intuit_method(s,gv)))
return tmp;
/* Not a method, so call it a subroutine (if defined) */
/* Resolve to GV now. */
op_free(yylval.opval);
yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
+ PL_last_lop_op = OP_ENTERSUB;
/* Is there a prototype? */
if (SvPOK(cv)) {
STRLEN len;
PL_last_lop_op != OP_TRUNCATE && /* S/F prototype in opcode.pl */
PL_last_lop_op != OP_ACCEPT &&
PL_last_lop_op != OP_PIPE_OP &&
- PL_last_lop_op != OP_SOCKPAIR)
+ PL_last_lop_op != OP_SOCKPAIR &&
+ !(PL_last_lop_op == OP_ENTERSUB
+ && PL_last_proto
+ && PL_last_proto[PL_last_proto[0] == ';' ? 1 : 0] == '*'))
{
warn(
"Bareword \"%s\" not allowed while \"strict subs\" in use",
/* Call it a bare word */
bareword:
- if (PL_dowarn) {
+ if (ckWARN(WARN_RESERVED)) {
if (lastchar != '-') {
for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
if (!*d)
- warn(warn_reserved, PL_tokenbuf);
+ warner(WARN_RESERVED, warn_reserved, PL_tokenbuf);
}
}
LOP(OP_CRYPT,XTERM);
case KEY_chmod:
- if (PL_dowarn) {
+ if (ckWARN(WARN_OCTAL)) {
for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
if (*d != '0' && isDIGIT(*d))
yywarn("chmod: mode argument is missing initial 0");
case KEY_foreach:
yylval.ival = PL_curcop->cop_line;
s = skipspace(s);
- if (PL_expect == XSTATE && isIDFIRST(*s)) {
+ if (PL_expect == XSTATE && isIDFIRST_lazy(s)) {
char *p = s;
if ((PL_bufend - p) >= 3 &&
strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
p += 2;
p = skipspace(p);
- if (isIDFIRST(*p))
+ if (isIDFIRST_lazy(p))
croak("Missing $ on loop variable");
}
OPERATOR(FOR);
TERM(sublex_start());
case KEY_map:
- LOP(OP_MAPSTART,XREF);
+ LOP(OP_MAPSTART, XREF);
case KEY_mkdir:
LOP(OP_MKDIR,XTERM);
case KEY_my:
PL_in_my = TRUE;
s = skipspace(s);
- if (isIDFIRST(*s)) {
+ if (isIDFIRST_lazy(s)) {
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
PL_in_my_stash = gv_stashpv(PL_tokenbuf, FALSE);
if (!PL_in_my_stash) {
case KEY_open:
s = skipspace(s);
- if (isIDFIRST(*s)) {
+ if (isIDFIRST_lazy(s)) {
char *t;
- for (d = s; isALNUM(*d); d++) ;
+ for (d = s; isALNUM_lazy(d); d++) ;
t = skipspace(d);
if (strchr("|&*+-=!?:.", *t))
warn("Precedence problem: open %.*s should be open(%.*s)",
s = scan_str(s);
if (!s)
missingterm((char*)0);
- if (PL_dowarn && SvLEN(PL_lex_stuff)) {
+ if (ckWARN(WARN_SYNTAX) && SvLEN(PL_lex_stuff)) {
d = SvPV_force(PL_lex_stuff, len);
for (; len; --len, ++d) {
if (*d == ',') {
- warn("Possible attempt to separate words with commas");
+ warner(WARN_SYNTAX,
+ "Possible attempt to separate words with commas");
break;
}
if (*d == '#') {
- warn("Possible attempt to put comments in qw() list");
+ warner(WARN_SYNTAX,
+ "Possible attempt to put comments in qw() list");
break;
}
}
case KEY_require:
*PL_tokenbuf = '\0';
s = force_word(s,WORD,TRUE,TRUE,FALSE);
- if (isIDFIRST(*PL_tokenbuf))
+ if (isIDFIRST_lazy(PL_tokenbuf))
gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
else if (*s == '<')
yyerror("<> should be quotes");
really_sub:
s = skipspace(s);
- if (isIDFIRST(*s) || *s == '\'' || *s == ':') {
+ if (isIDFIRST_lazy(s) || *s == '\'' || *s == ':') {
char tmpbuf[sizeof PL_tokenbuf];
PL_expect = XBLOCK;
d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
LOP(OP_UTIME,XTERM);
case KEY_umask:
- if (PL_dowarn) {
+ if (ckWARN(WARN_OCTAL)) {
for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
if (*d != '0' && isDIGIT(*d))
yywarn("umask: argument is missing initial 0");
FUN0(OP_WANTARRAY);
case KEY_write:
- gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
+#ifdef EBCDIC
+ {
+ static char ctl_l[2];
+
+ if (ctl_l[0] == '\0')
+ ctl_l[0] = toCTRL('L');
+ gv_fetchpv(ctl_l,TRUE, SVt_PV);
+ }
+#else
+ gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
+#endif
UNI(OP_ENTERWRITE);
case KEY_x:
{
char *w;
- if (PL_dowarn && *s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
- int level = 1;
- for (w = s+2; *w && level; w++) {
- if (*w == '(')
- ++level;
- else if (*w == ')')
- --level;
- }
- if (*w)
- for (; *w && isSPACE(*w); w++) ;
- if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
- warn("%s (...) interpreted as function",name);
+ if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_SYNTAX)) {
+ int level = 1;
+ for (w = s+2; *w && level; w++) {
+ if (*w == '(')
+ ++level;
+ else if (*w == ')')
+ --level;
+ }
+ if (*w)
+ for (; *w && isSPACE(*w); w++) ;
+ if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
+ warner(WARN_SYNTAX, "%s (...) interpreted as function",name);
+ }
}
while (s < PL_bufend && isSPACE(*s))
s++;
s++;
while (s < PL_bufend && isSPACE(*s))
s++;
- if (isIDFIRST(*s)) {
+ if (isIDFIRST_lazy(s)) {
w = s++;
- while (isALNUM(*s))
+ while (isALNUM_lazy(s))
s++;
while (s < PL_bufend && isSPACE(*s))
s++;
for (;;) {
if (d >= e)
croak(ident_too_long);
- if (isALNUM(*s))
+ if (isALNUM(*s)) /* UTF handled below */
*d++ = *s++;
- else if (*s == '\'' && allow_package && isIDFIRST(s[1])) {
+ else if (*s == '\'' && allow_package && isIDFIRST_lazy(s+1)) {
*d++ = ':';
*d++ = ':';
s++;
*d++ = *s++;
*d++ = *s++;
}
+ else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
+ char *t = s + UTF8SKIP(s);
+ while (*t & 0x80 && is_utf8_mark((U8*)t))
+ t += UTF8SKIP(t);
+ if (d + (t - s) > e)
+ croak(ident_too_long);
+ Copy(s, d, t - s, char);
+ d += t - s;
+ s = t;
+ }
else {
*d = '\0';
*slp = d - dest;
for (;;) {
if (d >= e)
croak(ident_too_long);
- if (isALNUM(*s))
+ if (isALNUM(*s)) /* UTF handled below */
*d++ = *s++;
- else if (*s == '\'' && isIDFIRST(s[1])) {
+ else if (*s == '\'' && isIDFIRST_lazy(s+1)) {
*d++ = ':';
*d++ = ':';
s++;
*d++ = *s++;
*d++ = *s++;
}
+ else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
+ char *t = s + UTF8SKIP(s);
+ while (*t & 0x80 && is_utf8_mark((U8*)t))
+ t += UTF8SKIP(t);
+ if (d + (t - s) > e)
+ croak(ident_too_long);
+ Copy(s, d, t - s, char);
+ d += t - s;
+ s = t;
+ }
else
break;
}
return s;
}
if (*s == '$' && s[1] &&
- (isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
+ (isALNUM_lazy(s+1) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
{
- if (isDIGIT(s[1]) && PL_lex_state == LEX_INTERPNORMAL)
- deprecate("\"$$<digit>\" to mean \"${$}<digit>\"");
- else
- return s;
+ return s;
}
if (*s == '{') {
bracket = s;
}
}
}
- if (isIDFIRST(*d)) {
+ if (isIDFIRST_lazy(d)) {
d++;
- while (isALNUM(*s) || *s == ':')
- *d++ = *s++;
+ if (UTF) {
+ e = s;
+ while (e < send && isALNUM_lazy(e) || *e == ':') {
+ e += UTF8SKIP(e);
+ while (e < send && *e & 0x80 && is_utf8_mark((U8*)e))
+ e += UTF8SKIP(e);
+ }
+ Copy(s, d, e - s, char);
+ d += e - s;
+ s = e;
+ }
+ else {
+ while (isALNUM(*s) || *s == ':')
+ *d++ = *s++;
+ }
*d = '\0';
while (s < send && (*s == ' ' || *s == '\t')) s++;
if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
- if (PL_dowarn && keyword(dest, d - dest)) {
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
char *brack = *s == '[' ? "[...]" : "{...}";
- warn("Ambiguous use of %c{%s%s} resolved to %c%s%s",
+ warner(WARN_AMBIGUOUS,
+ "Ambiguous use of %c{%s%s} resolved to %c%s%s",
funny, dest, brack, funny, dest, brack);
}
PL_lex_fakebrack = PL_lex_brackets+1;
PL_lex_state = LEX_INTERPEND;
if (funny == '#')
funny = '@';
- if (PL_dowarn && PL_lex_state == LEX_NORMAL &&
- (keyword(dest, d - dest) || perl_get_cv(dest, FALSE)))
- warn("Ambiguous use of %c{%s} resolved to %c%s",
- funny, dest, funny, dest);
+ if (PL_lex_state == LEX_NORMAL) {
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_AMBIGUOUS) &&
+ (keyword(dest, d - dest) || perl_get_cv(dest, FALSE)))
+ {
+ warner(WARN_AMBIGUOUS,
+ "Ambiguous use of %c{%s} resolved to %c%s",
+ funny, dest, funny, dest);
+ }
+ }
}
else {
s = bracket; /* let the parser handle it */
OP *o;
short *tbl;
I32 squash;
- I32 Delete;
+ I32 del;
I32 complement;
+ I32 utf8;
+ I32 count = 0;
yylval.ival = OP_NULL;
croak("Transliteration replacement not terminated");
}
- New(803,tbl,256,short);
- o = newPVOP(OP_TRANS, 0, (char*)tbl);
+ if (UTF) {
+ o = newSVOP(OP_TRANS, 0, 0);
+ utf8 = OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF;
+ }
+ else {
+ New(803,tbl,256,short);
+ o = newPVOP(OP_TRANS, 0, (char*)tbl);
+ utf8 = 0;
+ }
- complement = Delete = squash = 0;
- while (*s == 'c' || *s == 'd' || *s == 's') {
+ complement = del = squash = 0;
+ while (strchr("cdsCU", *s)) {
if (*s == 'c')
complement = OPpTRANS_COMPLEMENT;
else if (*s == 'd')
- Delete = OPpTRANS_DELETE;
- else
+ del = OPpTRANS_DELETE;
+ else if (*s == 's')
squash = OPpTRANS_SQUASH;
+ else {
+ switch (count++) {
+ case 0:
+ if (*s == 'C')
+ utf8 &= ~OPpTRANS_FROM_UTF;
+ else
+ utf8 |= OPpTRANS_FROM_UTF;
+ break;
+ case 1:
+ if (*s == 'C')
+ utf8 &= ~OPpTRANS_TO_UTF;
+ else
+ utf8 |= OPpTRANS_TO_UTF;
+ break;
+ default:
+ croak("Too many /C and /U options");
+ }
+ }
s++;
}
- o->op_private = Delete|squash|complement;
+ o->op_private = del|squash|complement|utf8;
PL_lex_op = o;
yylval.ival = OP_TRANS;
s++, term = '\'';
else
term = '"';
- if (!isALNUM(*s))
+ if (!isALNUM_lazy(s))
deprecate("bare << to mean <<\"\"");
- for (; isALNUM(*s); s++) {
+ for (; isALNUM_lazy(s); s++) {
if (d < e)
*d++ = *s;
}
*d++ = '\n';
*d = '\0';
len = d - PL_tokenbuf;
-#ifdef TMP_CRLF_PATCH
+#ifndef PERL_STRICT_CR
d = strchr(s, '\r');
if (d) {
char *olds = s;
}
PL_curcop->cop_line++;
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-#ifdef TMP_CRLF_PATCH
+#ifndef PERL_STRICT_CR
if (PL_bufend - PL_linestart >= 2) {
if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
(PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
if (*d == '$' && d[1]) d++;
/* allow <Pkg'VALUE> or <Pkg::VALUE> */
- while (*d && (isALNUM(*d) || *d == '\'' || *d == ':'))
+ while (*d && (isALNUM_lazy(d) || *d == '\'' || *d == ':'))
d++;
/* If we've tried to read what we allow filehandles to look like, and
if (s < PL_bufend) break; /* handle case where we are done yet :-) */
-#ifdef TMP_CRLF_PATCH
+#ifndef PERL_STRICT_CR
if (to - SvPVX(sv) >= 2) {
if ((to[-2] == '\r' && to[-1] == '\n') ||
(to[-2] == '\n' && to[-1] == '\r'))
}
/* we read a line, so increment our line counter */
PL_curcop->cop_line++;
-
+
/* update debugger info */
if (PERLDB_LINE && PL_curstash != PL_debstash) {
SV *sv = NEWSV(88,0);
av_store(GvAV(PL_curcop->cop_filegv),
(I32)PL_curcop->cop_line, sv);
}
-
+
/* having changed the buffer, we must update PL_bufend */
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
}
if -w is on
*/
if (*s == '_') {
- if (PL_dowarn && lastub && s - lastub != 3)
- warn("Misplaced _ in number");
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3)
+ warner(WARN_SYNTAX, "Misplaced _ in number");
lastub = ++s;
}
else {
}
/* final misplaced underbar check */
- if (PL_dowarn && lastub && s - lastub != 3)
- warn("Misplaced _ in number");
+ if (lastub && s - lastub != 3) {
+ dTHR;
+ if (ckWARN(WARN_SYNTAX))
+ warner(WARN_SYNTAX, "Misplaced _ in number");
+ }
/* read a decimal portion if there is one. avoid
3..5 being interpreted as the number 3. followed
while (!needargs) {
if (*s == '.' || *s == '}') {
/*SUPPRESS 530*/
- for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
+#ifdef PERL_STRICT_CR
+ for (t = s+1;*t == ' ' || *t == '\t'; t++) ;
+#else
+ for (t = s+1;*t == ' ' || *t == '\t' || *t == '\r'; t++) ;
+#endif
if (*t == '\n')
break;
}