#include "EXTERN.h"
#include "perl.h"
+#define yychar PL_yychar
+#define yylval PL_yylval
+
#ifndef PERL_OBJECT
static void check_uni _((void));
static void force_next _((I32 type));
void
deprecate(char *s)
{
+ dTHR;
if (ckWARN(WARN_DEPRECATED))
warner(WARN_DEPRECATED, "Use of %s is deprecated", s);
}
}
#endif
+#ifndef PERL_OBJECT
+
STATIC I32
utf16_textfilter(int idx, SV *sv, int maxlen)
{
I32 count = FILTER_READ(idx+1, sv, maxlen);
if (count) {
- char* tmps;
- char* tend;
- New(898, tmps, SvCUR(sv) * 3 / 2 + 1, char);
+ U8* tmps;
+ U8* tend;
+ New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
- sv_usepvn(sv, tmps, tend - tmps);
+ sv_usepvn(sv, (char*)tmps, tend - tmps);
}
return count;
{
I32 count = FILTER_READ(idx+1, sv, maxlen);
if (count) {
- char* tmps;
- char* tend;
- New(898, tmps, SvCUR(sv) * 3 / 2 + 1, char);
+ U8* tmps;
+ U8* tend;
+ New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
- sv_usepvn(sv, tmps, tend - tmps);
+ sv_usepvn(sv, (char*)tmps, tend - tmps);
}
return count;
}
+#endif
+
void
lex_start(SV *line)
{
/* expand a range A-Z to the full set of characters. AIE! */
if (dorange) {
I32 i; /* current expanded character */
+ I32 min; /* first character in range */
I32 max; /* last character in range */
i = d - SvPVX(sv); /* remember current offset */
d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */
d -= 2; /* eat the first char and the - */
- max = (U8)d[1]; /* last char in range */
-
- for (i = (U8)*d; i <= max; i++)
- *d++ = i;
+ min = (U8)*d; /* first char in range */
+ max = (U8)d[1]; /* last char in range */
+
+#ifndef ASCIIish
+ if ((isLOWER(min) && isLOWER(max)) ||
+ (isUPPER(min) && isUPPER(max))) {
+ if (isLOWER(min)) {
+ for (i = min; i <= max; i++)
+ if (isLOWER(i))
+ *d++ = i;
+ } else {
+ for (i = min; i <= max; i++)
+ if (isUPPER(i))
+ *d++ = i;
+ }
+ }
+ else
+#endif
+ for (i = min; i <= max; i++)
+ *d++ = i;
/* mark the range as done, and continue */
dorange = FALSE;
/* if we get here, we're not doing a transliteration */
- /* skip for regexp comments /(?#comment)/ */
+ /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
+ except for the last char, which will be done separately. */
else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
if (s[2] == '#') {
while (s < send && *s != ')')
*d++ = *s++;
- } else if (s[2] == '{') { /* This should march regcomp.c */
+ } else if (s[2] == '{'
+ || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
I32 count = 1;
- char *regparse = s + 3;
+ char *regparse = s + (s[2] == '{' ? 3 : 4);
char c;
while (count && (c = *regparse)) {
count--;
regparse++;
}
- if (*regparse == ')')
- regparse++;
- else
+ if (*regparse != ')') {
+ regparse--; /* Leave one char for continuation. */
yyerror("Sequence (?{...}) not terminated or not {}-balanced");
- while (s < regparse && *s != ')')
+ }
+ while (s < regparse)
*d++ = *s++;
}
}
/* (now in tr/// code again) */
- if (*s & 0x80 && ckWARN(WARN_UTF8) && thisutf) {
- (void)utf8_to_uv(s, &len); /* could cvt latin-1 to utf8 here... */
- if (len) {
- while (len--)
- *d++ = *s++;
- continue;
+ if (*s & 0x80 && thisutf) {
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_UTF8)) {
+ (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
+ if (len) {
+ while (len--)
+ *d++ = *s++;
+ continue;
+ }
}
}
if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
{
+ dTHR; /* only for ckWARN */
if (ckWARN(WARN_SYNTAX))
warner(WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
*--s = '$';
if (!e)
yyerror("Missing right brace on \\x{}");
- if (ckWARN(WARN_UTF8) && !utf)
- warner(WARN_UTF8,"Use of \\x{} without utf8 declaration");
+ if (!utf) {
+ dTHR;
+ if (ckWARN(WARN_UTF8))
+ warner(WARN_UTF8,
+ "Use of \\x{} without utf8 declaration");
+ }
/* note: utf always shorter than hex */
- d = uv_to_utf8(d, scan_hex(s + 1, e - s - 1, &len));
+ d = (char*)uv_to_utf8((U8*)d,
+ scan_hex(s + 1, e - s - 1, &len));
s = e + 1;
}
if (utf && PL_lex_inwhat == OP_TRANS &&
utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
{
- d = uv_to_utf8(d, uv); /* doing a CU or UC */
+ d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
}
else {
- if (ckWARN(WARN_UTF8) && uv >= 127 && UTF)
- warner(WARN_UTF8,
- "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
- len,s,len,s);
+ if (uv >= 127 && UTF) {
+ dTHR;
+ if (ckWARN(WARN_UTF8))
+ warner(WARN_UTF8,
+ "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
+ len,s,len,s);
+ }
*d++ = (char)uv;
}
s += len;
if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
return;
/* if filter is on top of stack (usual case) just pop it off */
- if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (void*)funcp){
+ if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (DIR*)funcp){
sv_free(av_pop(PL_rsfp_filters));
return;
* routines unnecessarily. You will see this not just here but throughout this file.
*/
if (UTF && (*s & 0xc0) == 0x80) {
- if (isIDFIRST_utf8(s))
+ if (isIDFIRST_utf8((U8*)s))
goto keylookup;
}
croak("Unrecognized character \\x%02X", *s & 255);
}
if (PL_lex_brackets < PL_lex_formbrack) {
char *t;
+#ifdef PERL_STRICT_CR
for (t = s; *t == ' ' || *t == '\t'; t++) ;
+#else
+ for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
+#endif
if (*t == '\n' || *t == '#') {
s--;
PL_expect = XBLOCK;
}
}
- if (s[1] == '#' && (isALPHA(s[2]) || strchr("_{$:", s[2]))) {
+ if (s[1] == '#' && (isALPHA(s[2]) || strchr("_{$:+-", s[2]))) {
if (PL_expect == XOPERATOR)
no_op("Array length", PL_bufptr);
PL_tokenbuf[0] = '@';
OPERATOR(tmp);
case '.':
- if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack && s[1] == '\n' &&
- (s == PL_linestart || s[-1] == '\n') ) {
+ if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
+#ifdef PERL_STRICT_CR
+ && s[1] == '\n'
+#else
+ && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
+#endif
+ && (s == PL_linestart || s[-1] == '\n') )
+ {
PL_lex_formbrack = 0;
PL_expect = XSTATE;
goto rightbracket;
tmp = -tmp;
gv = Nullgv;
gvp = 0;
- if (ckWARN(WARN_AMBIGUOUS) && hgv)
+ if (ckWARN(WARN_AMBIGUOUS) && hgv
+ && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
warner(WARN_AMBIGUOUS,
"Ambiguous call resolved as CORE::%s(), %s",
GvENAME(hgv), "qualify as such or use &");
{
char *w;
- if (ckWARN(WARN_SYNTAX) && *s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
- int level = 1;
- for (w = s+2; *w && level; w++) {
- if (*w == '(')
- ++level;
- else if (*w == ')')
- --level;
- }
- if (*w)
- for (; *w && isSPACE(*w); w++) ;
- if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
- warner(WARN_SYNTAX, "%s (...) interpreted as function",name);
+ if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_SYNTAX)) {
+ int level = 1;
+ for (w = s+2; *w && level; w++) {
+ if (*w == '(')
+ ++level;
+ else if (*w == ')')
+ --level;
+ }
+ if (*w)
+ for (; *w && isSPACE(*w); w++) ;
+ if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
+ warner(WARN_SYNTAX, "%s (...) interpreted as function",name);
+ }
}
while (s < PL_bufend && isSPACE(*s))
s++;
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8(s)) {
+ else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8((U8*)s)) {
char *t = s + UTF8SKIP(s);
- while (*t & 0x80 && is_utf8_mark(t))
+ while (*t & 0x80 && is_utf8_mark((U8*)t))
t += UTF8SKIP(t);
if (d + (t - s) > e)
croak(ident_too_long);
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8(s)) {
+ else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8((U8*)s)) {
char *t = s + UTF8SKIP(s);
- while (*t & 0x80 && is_utf8_mark(t))
+ while (*t & 0x80 && is_utf8_mark((U8*)t))
t += UTF8SKIP(t);
if (d + (t - s) > e)
croak(ident_too_long);
return s;
}
if (*s == '$' && s[1] &&
- (isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
+ (isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
{
- if (isDIGIT(s[1]) && PL_lex_state == LEX_INTERPNORMAL)
- deprecate("\"$$<digit>\" to mean \"${$}<digit>\"");
- else
- return s;
+ return s;
}
if (*s == '{') {
bracket = s;
}
}
}
- if (isIDFIRST(*d) || (UTF && (*d & 0xc0) == 0x80 && isIDFIRST_utf8(d))) {
+ if (isIDFIRST(*d) || (UTF && (*d & 0xc0) == 0x80 && isIDFIRST_utf8((U8*)d))) {
d++;
if (UTF) {
e = s;
while (e < send && (isALNUM(*e) || ((*e & 0xc0) == 0x80 && isALNUM_utf8((U8*)e)) || *e == ':')) {
e += UTF8SKIP(e);
- while (e < send && *e & 0x80 && is_utf8_mark(e))
+ while (e < send && *e & 0x80 && is_utf8_mark((U8*)e))
e += UTF8SKIP(e);
}
Copy(s, d, e - s, char);
*d = '\0';
while (s < send && (*s == ' ' || *s == '\t')) s++;
if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
+ dTHR; /* only for ckWARN */
if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
char *brack = *s == '[' ? "[...]" : "{...}";
warner(WARN_AMBIGUOUS,
PL_lex_state = LEX_INTERPEND;
if (funny == '#')
funny = '@';
- if (ckWARN(WARN_AMBIGUOUS) && PL_lex_state == LEX_NORMAL &&
- (keyword(dest, d - dest) || perl_get_cv(dest, FALSE)))
- warner(WARN_AMBIGUOUS,
- "Ambiguous use of %c{%s} resolved to %c%s",
- funny, dest, funny, dest);
+ if (PL_lex_state == LEX_NORMAL) {
+ dTHR; /* only for ckWARN */
+ if (ckWARN(WARN_AMBIGUOUS) &&
+ (keyword(dest, d - dest) || perl_get_cv(dest, FALSE)))
+ {
+ warner(WARN_AMBIGUOUS,
+ "Ambiguous use of %c{%s} resolved to %c%s",
+ funny, dest, funny, dest);
+ }
+ }
}
else {
s = bracket; /* let the parser handle it */
if -w is on
*/
if (*s == '_') {
+ dTHR; /* only for ckWARN */
if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3)
warner(WARN_SYNTAX, "Misplaced _ in number");
lastub = ++s;
}
/* final misplaced underbar check */
- if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3)
- warner(WARN_SYNTAX, "Misplaced _ in number");
+ if (lastub && s - lastub != 3) {
+ dTHR;
+ if (ckWARN(WARN_SYNTAX))
+ warner(WARN_SYNTAX, "Misplaced _ in number");
+ }
/* read a decimal portion if there is one. avoid
3..5 being interpreted as the number 3. followed
while (!needargs) {
if (*s == '.' || *s == '}') {
/*SUPPRESS 530*/
- for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
+#ifdef PERL_STRICT_CR
+ for (t = s+1;*t == ' ' || *t == '\t'; t++) ;
+#else
+ for (t = s+1;*t == ' ' || *t == '\t' || *t == '\r'; t++) ;
+#endif
if (*t == '\n')
break;
}