/* toke.c
*
- * Copyright (c) 1991-2000, Larry Wall
+ * Copyright (c) 1991-2001, Larry Wall
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
# endif
YYSTYPE* yylval_pointer[YYMAXLEVEL];
int* yychar_pointer[YYMAXLEVEL];
-int yyactlevel = 0;
+int yyactlevel = -1;
# undef yylval
# undef yychar
# define yylval (*yylval_pointer[yyactlevel])
* Also see LOP and lop() below.
*/
-#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
-#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
-#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
-#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
-#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
-#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
-#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
-#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
-#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
-#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
-#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
-#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
-#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
-#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
-#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
-#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
-#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
-#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
-#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
-#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
+/* Note that REPORT() and REPORT2() will be expressions that supply
+ * their own trailing comma, not suitable for statements as such. */
+#ifdef DEBUGGING /* Serve -DT. */
+# define REPORT(x,retval) tokereport(x,s,(int)retval),
+# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
+#else
+# define REPORT(x,retval)
+# define REPORT2(x,retval)
+#endif
+
+#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
+#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
+#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
+#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
+#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
+#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
+#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
+#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
+#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
+#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
+#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
+#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
+#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
+#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
+#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
+#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
+#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
+#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
+#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
+#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
/* This bit of chicanery makes a unary function followed by
* a parenthesis into a function with one argument, highest precedence.
*/
#define UNI(f) return(yylval.ival = f, \
+ REPORT("uni",f) \
PL_expect = XTERM, \
PL_bufptr = s, \
PL_last_uni = PL_oldbufptr, \
(*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
#define UNIBRACK(f) return(yylval.ival = f, \
+ REPORT("uni",f) \
PL_bufptr = s, \
PL_last_uni = PL_oldbufptr, \
(*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
/* grandfather return to old style */
#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
+STATIC void
+S_tokereport(pTHX_ char *thing, char* s, I32 rv)
+{
+ SV *report;
+ DEBUG_T({
+ report = newSVpv(thing, 0);
+ Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
+
+ if (s - PL_bufptr > 0)
+ sv_catpvn(report, PL_bufptr, s - PL_bufptr);
+ else {
+ if (PL_oldbufptr && *PL_oldbufptr)
+ sv_catpv(report, PL_tokenbuf);
+ }
+ PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
+ })
+}
+
/*
* S_ao
*
SAVEPPTR(PL_bufend);
SAVEPPTR(PL_oldbufptr);
SAVEPPTR(PL_oldoldbufptr);
+ SAVEPPTR(PL_last_lop);
+ SAVEPPTR(PL_last_uni);
SAVEPPTR(PL_linestart);
SAVESPTR(PL_linestr);
SAVEPPTR(PL_lex_brackstack);
SvTEMP_off(PL_linestr);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
PL_bufend = PL_bufptr + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
SvREFCNT_dec(PL_rs);
PL_rs = newSVpvn("\n", 1);
PL_rsfp = 0;
s += 4;
else
return;
- if (*s == ' ' || *s == '\t')
+ if (SPACE_OR_TAB(*s))
s++;
else
return;
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
= SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
/* Close the filehandle. Could be from -P preprocessor,
* STDIN, or a regular file. If we were reading code from
{
yylval.ival = f;
CLINE;
+ REPORT("lop", f)
PL_expect = x;
PL_bufptr = s;
PL_last_lop = PL_oldbufptr;
SAVEPPTR(PL_bufptr);
SAVEPPTR(PL_oldbufptr);
SAVEPPTR(PL_oldoldbufptr);
+ SAVEPPTR(PL_last_lop);
+ SAVEPPTR(PL_last_uni);
SAVEPPTR(PL_linestart);
SAVESPTR(PL_linestr);
SAVEPPTR(PL_lex_brackstack);
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
SAVEFREESV(PL_linestr);
PL_lex_dojoin = FALSE;
S_sublex_done(pTHX)
{
if (!PL_lex_starts++) {
+ SV *sv = newSVpvn("",0);
+ if (SvUTF8(PL_linestr))
+ SvUTF8_on(sv);
PL_expect = XOPERATOR;
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
+ yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
return THING;
}
PL_lex_inpat = 0;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
SAVEFREESV(PL_linestr);
PL_lex_dojoin = FALSE;
PL_lex_brackets = 0;
register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
- bool has_utf8 = FALSE; /* embedded \x{} */
+ bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
+ /* the constant is UTF8 */
UV uv;
I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
I32 min; /* first character in range */
I32 max; /* last character in range */
+ if (utf) {
+ char *c = (char*)utf8_hop((U8*)d, -1);
+ char *e = d++;
+ while (e-- > c)
+ *(e + 1) = *e;
+ *c = (char)0xff;
+ /* mark the range as done, and continue */
+ dorange = FALSE;
+ didrange = TRUE;
+ continue;
+ }
i = d - SvPVX(sv); /* remember current offset */
SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
d = SvPVX(sv) + i; /* refresh d after realloc */
break; /* in regexp, $ might be tail anchor */
}
- /* (now in tr/// code again) */
-
- if (*s & 0x80 && (this_utf8 || has_utf8)) {
- STRLEN len = (STRLEN) -1;
- UV uv;
- if (this_utf8) {
- uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
- }
- if (len == (STRLEN)-1) {
- /* Illegal UTF8 (a high-bit byte), make it valid. */
- char *old_pvx = SvPVX(sv);
- /* need space for one extra char (NOTE: SvCUR() not set here) */
- d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
- d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
- }
- else {
- while (len--)
- *d++ = *s++;
- }
- has_utf8 = TRUE;
- continue;
- }
-
/* backslashes */
if (*s == '\\' && s+1 < send) {
- bool to_be_utf8 = FALSE;
-
s++;
/* some backslashes we leave behind */
"Unrecognized escape \\%c passed through",
*s);
/* default action is to copy the quoted character */
- *d++ = *s++;
- continue;
+ goto default_action;
}
/* \132 indicates an octal constant */
else {
STRLEN len = 1; /* allow underscores */
uv = (UV)scan_hex(s + 1, e - s - 1, &len);
- to_be_utf8 = TRUE;
}
s = e + 1;
}
NUM_ESCAPE_INSERT:
/* Insert oct or hex escaped character.
- * There will always enough room in sv since such escapes will
- * be longer than any utf8 sequence they can end up as
- */
+ * There will always enough room in sv since such
+ * escapes will be longer than any UT-F8 sequence
+ * they can end up as. */
+
+ /* This spot is wrong for EBCDIC. Characters like
+ * the lowercase letters and digits are >127 in EBCDIC,
+ * so here they would need to be mapped to the Unicode
+ * repertoire. --jhi */
+
if (uv > 127) {
- if (!has_utf8 && (to_be_utf8 || uv > 255)) {
- /* might need to recode whatever we have accumulated so far
- * if it contains any hibit chars
+ if (!has_utf8 && uv > 255) {
+ /* Might need to recode whatever we have
+ * accumulated so far if it contains any
+ * hibit chars.
+ *
+ * (Can't we keep track of that and avoid
+ * this rescan? --jhi)
*/
int hicount = 0;
char *c;
+
for (c = SvPVX(sv); c < d; c++) {
- if (*c & 0x80)
+ if (UTF8_IS_CONTINUED(*c))
hicount++;
}
if (hicount) {
char *old_pvx = SvPVX(sv);
char *src, *dst;
- d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
+
+ d = SvGROW(sv,
+ SvLEN(sv) + hicount + 1) +
+ (d - old_pvx);
src = d - 1;
d += hicount;
dst = d - 1;
while (src < dst) {
- if (*src & 0x80) {
- dst--;
- uv_to_utf8((U8*)dst, (U8)*src--);
- dst--;
+ if (UTF8_IS_CONTINUED(*src)) {
+ *dst-- = UTF8_EIGHT_BIT_LO(*src);
+ *dst-- = UTF8_EIGHT_BIT_HI(*src--);
}
else {
*dst-- = *src--;
}
}
- if (to_be_utf8 || uv > 255) {
+ if (has_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
has_utf8 = TRUE;
+ if (PL_lex_inwhat == OP_TRANS &&
+ PL_sublex_info.sub_op) {
+ PL_sublex_info.sub_op->op_private |=
+ (PL_lex_repl ? OPpTRANS_FROM_UTF
+ : OPpTRANS_TO_UTF);
+ utf = TRUE;
+ }
}
else {
*d++ = (char)uv;
res = newSVpvn(s + 1, e - s - 1);
res = new_constant( Nullch, 0, "charnames",
res, Nullsv, "\\N{...}" );
+ if (has_utf8)
+ sv_utf8_upgrade(res);
str = SvPV(res,len);
if (!has_utf8 && SvUTF8(res)) {
char *ostart = SvPVX(sv);
if (len > e - s + 4) {
char *odest = SvPVX(sv);
- SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
+ SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
d = SvPVX(sv) + (d - odest);
}
Copy(str, d, len, char);
continue;
} /* end if (backslash) */
- *d++ = *s++;
+ default_action:
+ if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
+ STRLEN len = (STRLEN) -1;
+ UV uv;
+ if (this_utf8) {
+ uv = utf8_to_uv((U8*)s, send - s, &len, 0);
+ }
+ if (len == (STRLEN)-1) {
+ /* Illegal UTF8 (a high-bit byte), make it valid. */
+ char *old_pvx = SvPVX(sv);
+ /* need space for one extra char (NOTE: SvCUR() not set here) */
+ d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
+ d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
+ }
+ else {
+ while (len--)
+ *d++ = *s++;
+ }
+ has_utf8 = TRUE;
+ if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
+ PL_sublex_info.sub_op->op_private |=
+ (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+ utf = TRUE;
+ }
+ continue;
+ }
+
+ *d++ = *s++;
} /* while loop to process each character */
/* terminate the string and set up the sv */
*/
#ifdef USE_PURE_BISON
-#ifdef __SC__
-#pragma segment Perl_yylex_r
-#endif
int
Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
{
int r;
+ yyactlevel++;
yylval_pointer[yyactlevel] = lvalp;
yychar_pointer[yyactlevel] = lcharp;
- yyactlevel++;
if (yyactlevel >= YYMAXLEVEL)
Perl_croak(aTHX_ "panic: YYMAXLEVEL");
r = Perl_yylex(aTHX);
- yyactlevel--;
+ if (yyactlevel > 0)
+ yyactlevel--;
return r;
}
#ifdef __SC__
#pragma segment Perl_yylex
#endif
-
int
-#ifdef USE_PURE_BISON
-Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
-#else
Perl_yylex(pTHX)
-#endif
{
register char *s;
register char *d;
STRLEN len;
GV *gv = Nullgv;
GV **gvp = 0;
+ bool bof = FALSE;
/* check if there's an identifier for us to look at */
if (PL_pending_ident) {
sv_catpv(PL_linestr, "\n");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
if (PERLDB_LINE && PL_curstash != PL_debstash) {
SV *sv = NEWSV(85,0);
goto retry;
}
do {
- bool bof = PL_rsfp ? TRUE : FALSE;
- if (bof) {
+ bof = PL_rsfp ? TRUE : FALSE;
+ if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
+ fake_eof:
+ if (PL_rsfp) {
+ if (PL_preprocess && !PL_in_eval)
+ (void)PerlProc_pclose(PL_rsfp);
+ else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
+ PerlIO_clearerr(PL_rsfp);
+ else
+ (void)PerlIO_close(PL_rsfp);
+ PL_rsfp = Nullfp;
+ PL_doextract = FALSE;
+ }
+ if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
+ sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
+ sv_catpv(PL_linestr,";}");
+ PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
+ PL_minus_n = PL_minus_p = 0;
+ goto retry;
+ }
+ PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
+ sv_setpv(PL_linestr,"");
+ TOKEN(';'); /* not infinite loop because rsfp is NULL now */
+ }
+ /* if it looks like the start of a BOM, check if it in fact is */
+ else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
#ifdef PERLIO_IS_STDIO
# ifdef __GNU_LIBRARY__
# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
* Workaround? Maybe attach some extra state to PL_rsfp?
*/
if (!PL_preprocess)
- bof = PerlIO_tell(PL_rsfp) == 0;
+ bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
#else
- bof = PerlIO_tell(PL_rsfp) == 0;
+ bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
#endif
- }
- s = filter_gets(PL_linestr, PL_rsfp, 0);
- if (s == Nullch) {
- fake_eof:
- if (PL_rsfp) {
- if (PL_preprocess && !PL_in_eval)
- (void)PerlProc_pclose(PL_rsfp);
- else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
- PerlIO_clearerr(PL_rsfp);
- else
- (void)PerlIO_close(PL_rsfp);
- PL_rsfp = Nullfp;
- PL_doextract = FALSE;
- }
- if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
- sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
- sv_catpv(PL_linestr,";}");
- PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
+ if (bof) {
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_minus_n = PL_minus_p = 0;
- goto retry;
+ s = swallow_bom((U8*)s);
}
- PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
- sv_setpv(PL_linestr,"");
- TOKEN(';'); /* not infinite loop because rsfp is NULL now */
- } else if (bof) {
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- s = swallow_bom((U8*)s);
}
if (PL_doextract) {
if (*s == '#' && s[1] == '!' && instr(s,"perl"))
sv_setpv(PL_linestr, "");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
PL_doextract = FALSE;
}
}
av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
}
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
if (CopLINE(PL_curcop) == 1) {
while (s < PL_bufend && isSPACE(*s))
s++;
sv_setpv(PL_linestr, "");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
PL_preambled = FALSE;
if (PERLDB_LINE)
(void)gv_fetchfile(PL_origfilename);
if (ftst) {
PL_last_lop_op = ftst;
DEBUG_T( { PerlIO_printf(Perl_debug_log,
- "### Saw file test %c\n", ftst);
+ "### Saw file test %c\n", (int)ftst);
} )
FTST(ftst);
}
/* Assume it was a minus followed by a one-letter named
* subroutine call (or a -bareword), then. */
DEBUG_T( { PerlIO_printf(Perl_debug_log,
- "### %c looked like a file test but was not\n", ftst);
+ "### %c looked like a file test but was not\n",
+ (int)ftst);
} )
s -= 2;
}
if (*d == '(') {
d = scan_str(d,TRUE,TRUE);
if (!d) {
- if (PL_lex_stuff) {
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
- }
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
*/
PL_lex_stuff = Nullsv;
}
else {
- attrs = append_elem(OP_LIST, attrs,
- newSVOP(OP_CONST, 0,
- newSVpvn(s, len)));
+ if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
+ CvLVALUE_on(PL_compcv);
+ else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
+ CvLOCKED_on(PL_compcv);
+ else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
+ CvMETHOD_on(PL_compcv);
+#ifdef USE_ITHREADS
+ else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
+ GvSHARED_on(cGVOPx_gv(yylval.opval));
+#endif
+ /* After we've set the flags, it could be argued that
+ we don't need to do the attributes.pm-based setting
+ process, and shouldn't bother appending recognized
+ flags. To experiment with that, uncomment the
+ following "else": */
+ else
+ attrs = append_elem(OP_LIST, attrs,
+ newSVOP(OP_CONST, 0,
+ newSVpvn(s, len)));
}
s = skipspace(d);
if (*s == ':' && s[1] != ':')
if (*d == '}') {
char minus = (PL_tokenbuf[0] == '-');
s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
- if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
- PL_nextval[PL_nexttoke-1].opval)
- SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
if (minus)
force_next('-');
}
case '\'':
s = scan_str(s,FALSE,FALSE);
DEBUG_T( { PerlIO_printf(Perl_debug_log,
- "### Saw string in '%s'\n", s);
+ "### Saw string before '%s'\n", s);
} )
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
case '"':
s = scan_str(s,FALSE,FALSE);
DEBUG_T( { PerlIO_printf(Perl_debug_log,
- "### Saw string in '%s'\n", s);
+ "### Saw string before '%s'\n", s);
} )
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
missingterm((char*)0);
yylval.ival = OP_CONST;
for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
- if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
+ if (*d == '$' || *d == '@' || *d == '\\' || UTF8_IS_CONTINUED(*d)) {
yylval.ival = OP_STRINGIFY;
break;
}
case '`':
s = scan_str(s,FALSE,FALSE);
DEBUG_T( { PerlIO_printf(Perl_debug_log,
- "### Saw backtick string in '%s'\n", s);
+ "### Saw backtick string before '%s'\n", s);
} )
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
/* If not a declared subroutine, it's an indirect object. */
/* (But it's an indir obj regardless for sort.) */
- if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
+ if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
((!gv || !GvCVu(gv)) &&
(PL_last_lop_op != OP_MAPSTART &&
PL_last_lop_op != OP_GREPSTART))))
(void)PerlIO_seek(PL_rsfp, 0L, 0);
}
if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
-#if defined(__BORLANDC__)
- /* XXX see note in do_binmode() */
- ((FILE*)PL_rsfp)->flags |= _F_BIN;
-#endif
if (loc > 0)
PerlIO_seek(PL_rsfp, loc, 0);
}
force_next(THING);
}
}
- if (PL_lex_stuff)
+ if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ PL_lex_stuff = Nullsv;
+ }
PL_expect = XTERM;
TOKEN('(');
char *p;
s = scan_str(s,FALSE,FALSE);
- if (!s) {
- if (PL_lex_stuff)
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
- }
/* strip spaces */
d = SvPVX(PL_lex_stuff);
tmp = 0;
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
+ else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
char *t = s + UTF8SKIP(s);
- while (*t & 0x80 && is_utf8_mark((U8*)t))
+ while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
t += UTF8SKIP(t);
if (d + (t - s) > e)
Perl_croak(aTHX_ ident_too_long);
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
+ else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
char *t = s + UTF8SKIP(s);
- while (*t & 0x80 && is_utf8_mark((U8*)t))
+ while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
t += UTF8SKIP(t);
if (d + (t - s) > e)
Perl_croak(aTHX_ ident_too_long);
e = s;
while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
e += UTF8SKIP(e);
- while (e < send && *e & 0x80 && is_utf8_mark((U8*)e))
+ while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
e += UTF8SKIP(e);
}
Copy(s, d, e - s, char);
char *s;
s = scan_str(start,FALSE,FALSE);
- if (!s) {
- if (PL_lex_stuff)
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ if (!s)
Perl_croak(aTHX_ "Search pattern not terminated");
- }
pm = (PMOP*)newPMOP(type, 0);
if (PL_multi_open == '?')
s = scan_str(start,FALSE,FALSE);
- if (!s) {
- if (PL_lex_stuff)
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
- }
if (s[-1] == PL_multi_open)
s--;
first_start = PL_multi_start;
s = scan_str(s,FALSE,FALSE);
if (!s) {
- if (PL_lex_stuff)
+ if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
- if (PL_lex_repl)
- SvREFCNT_dec(PL_lex_repl);
- PL_lex_repl = Nullsv;
+ PL_lex_stuff = Nullsv;
+ }
Perl_croak(aTHX_ "Substitution replacement not terminated");
}
PL_multi_start = first_start; /* so whole substitution is taken together */
I32 squash;
I32 del;
I32 complement;
- I32 utf8;
- I32 count = 0;
yylval.ival = OP_NULL;
s = scan_str(start,FALSE,FALSE);
- if (!s) {
- if (PL_lex_stuff)
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
- }
if (s[-1] == PL_multi_open)
s--;
s = scan_str(s,FALSE,FALSE);
if (!s) {
- if (PL_lex_stuff)
+ if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
- if (PL_lex_repl)
- SvREFCNT_dec(PL_lex_repl);
- PL_lex_repl = Nullsv;
+ PL_lex_stuff = Nullsv;
+ }
Perl_croak(aTHX_ "Transliteration replacement not terminated");
}
- New(803,tbl,256,short);
- o = newPVOP(OP_TRANS, 0, (char*)tbl);
-
complement = del = squash = 0;
while (strchr("cds", *s)) {
if (*s == 'c')
squash = OPpTRANS_SQUASH;
s++;
}
+
+ New(803, tbl, complement&&!del?258:256, short);
+ o = newPVOP(OP_TRANS, 0, (char*)tbl);
o->op_private = del|squash|complement|
(DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
(DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
sv_setsv(PL_linestr,herewas);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
}
else
sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
}
CopLINE_inc(PL_curcop);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
#ifndef PERL_STRICT_CR
if (PL_bufend - PL_linestart >= 2) {
if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
delimiter. It allows quoting of delimiters, and if the string has
balanced delimiters ([{<>}]) it allows nesting.
- The lexer always reads these strings into lex_stuff, except in the
- case of the operators which take *two* arguments (s/// and tr///)
- when it checks to see if lex_stuff is full (presumably with the 1st
- arg to s or tr) and if so puts the string into lex_repl.
-
+ On success, the SV with the resulting string is put into lex_stuff or,
+ if that is already non-NULL, into lex_repl. The second case occurs only
+ when parsing the RHS of the special constructs s/// and tr/// (y///).
+ For convenience, the terminating delimiter character is stuffed into
+ SvIVX of the SV.
*/
STATIC char *
/* after skipping whitespace, the next character is the terminator */
term = *s;
- if ((term & 0x80) && UTF)
+ if (UTF8_IS_CONTINUED(term) && UTF)
has_utf8 = TRUE;
/* mark where we are */
have found the terminator */
else if (*s == term)
break;
- else if (!has_utf8 && (*s & 0x80) && UTF)
+ else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
has_utf8 = TRUE;
*to = *s;
}
break;
else if (*s == PL_multi_open)
brackets++;
- else if (!has_utf8 && (*s & 0x80) && UTF)
+ else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
has_utf8 = TRUE;
*to = *s;
}
/* having changed the buffer, we must update PL_bufend */
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
}
/* at this point, we have successfully read the delimited string */
while (isDIGIT(*pos) || *pos == '_')
pos++;
if (!isALPHA(*pos)) {
- UV rev;
+ UV rev, revmax = 0;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tmpend;
- bool utf8 = FALSE;
s++; /* get past 'v' */
sv = NEWSV(92,5);
}
}
tmpend = uv_to_utf8(tmpbuf, rev);
- utf8 = utf8 || rev > 127;
+ if (rev > revmax)
+ revmax = rev;
sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
if (*pos == '.' && isDIGIT(pos[1]))
s = ++pos;
SvPOK_on(sv);
SvREADONLY_on(sv);
- if (utf8) {
+ if (revmax > 127) {
SvUTF8_on(sv);
- if (!UTF||IN_BYTE)
+ if (revmax < 256)
sv_utf8_downgrade(sv, TRUE);
}
}
s = filter_gets(PL_linestr, PL_rsfp, 0);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
PL_bufend = PL_bufptr + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
if (!s) {
s = PL_bufptr;
yyerror("Format not terminated");
return oldsavestack_ix;
}
+#ifdef __SC__
+#pragma segment Perl_yylex
+#endif
int
Perl_yywarn(pTHX_ char *s)
{
PL_in_my_stash = Nullhv;
return 0;
}
+#ifdef __SC__
+#pragma segment Main
+#endif
STATIC char*
S_swallow_bom(pTHX_ U8 *s)