X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=458e2585135fe406760dbe57806a6b3edba4a03e;hb=76ced9add7b621dfc9d4ecb534aeea8e131a418a;hp=2d9680268334256fc5a89bf6b4ecd08b19845f29;hpb=e1992b6d91e50ab66a903c570e8d9c48f121f34b;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index 2d96802..458e258 100644 --- a/toke.c +++ b/toke.c @@ -28,6 +28,10 @@ static char ident_too_long[] = "Identifier too long"; static void restore_rsfp(pTHXo_ void *f); +#ifndef PERL_NO_UTF16_FILTER +static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen); +static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen); +#endif #define XFAKEBRACK 128 #define XENUMMASK 127 @@ -39,6 +43,13 @@ static void restore_rsfp(pTHXo_ void *f); * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) +/* On MacOS, respect nonbreaking spaces */ +#ifdef MACOS_TRADITIONAL +#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t') +#else +#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') +#endif + /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement * can get by with a single comparison (if the compiler is smart enough). @@ -69,15 +80,19 @@ static void restore_rsfp(pTHXo_ void *f); #endif #ifdef USE_PURE_BISON -YYSTYPE* yylval_pointer = NULL; -int* yychar_pointer = NULL; +# ifndef YYMAXLEVEL +# define YYMAXLEVEL 100 +# endif +YYSTYPE* yylval_pointer[YYMAXLEVEL]; +int* yychar_pointer[YYMAXLEVEL]; +int yyactlevel = 0; # undef yylval # undef yychar -# define yylval (*yylval_pointer) -# define yychar (*yychar_pointer) -# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer -# undef yylex -# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer) +# define yylval (*yylval_pointer[yyactlevel]) +# define yychar (*yychar_pointer[yyactlevel]) +# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel] +# undef yylex +# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]) #endif #include "keywords.h" @@ -198,10 +213,8 @@ S_no_op(pTHX_ char *what, char *s) if (!s) s = oldbp; - else { - assert(s >= oldbp); + else PL_bufptr = s; - } yywarn(Perl_form(aTHX_ "%s found where operator expected", what)); if (is_first) Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n"); @@ -212,8 +225,10 @@ S_no_op(pTHX_ char *what, char *s) Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n", t - PL_oldoldbufptr, PL_oldoldbufptr); } - else + else { + assert(s >= oldbp); Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp); + } PL_bufptr = oldbp; } @@ -319,36 +334,6 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) } #endif -#if 0 -STATIC I32 -S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) -{ - I32 count = FILTER_READ(idx+1, sv, maxlen); - if (count) { - U8* tmps; - U8* tend; - New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); - tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv)); - sv_usepvn(sv, (char*)tmps, tend - tmps); - } - return count; -} - -STATIC I32 -S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen) -{ - I32 count = FILTER_READ(idx+1, sv, maxlen); - if (count) { - U8* tmps; - U8* tend; - New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); - tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv)); - sv_usepvn(sv, (char*)tmps, tend - tmps); - } - return count; -} -#endif - /* * Perl_lex_start * Initialize variables. Uses the Perl save_stack to save its state (for @@ -376,7 +361,6 @@ Perl_lex_start(pTHX_ SV *line) SAVEVPTR(PL_nextval[toke]); } SAVEI32(PL_nexttoke); - PL_nexttoke = 0; } SAVECOPLINE(PL_curcop); SAVEPPTR(PL_bufptr); @@ -410,6 +394,7 @@ Perl_lex_start(pTHX_ SV *line) PL_lex_stuff = Nullsv; PL_lex_repl = Nullsv; PL_lex_inpat = 0; + PL_nexttoke = 0; PL_lex_inwhat = 0; PL_sublex_info.sub_inwhat = 0; PL_linestr = line; @@ -463,7 +448,7 @@ S_incline(pTHX_ char *s) CopLINE_inc(PL_curcop); if (*s++ != '#') return; - while (*s == ' ' || *s == '\t') s++; + while (SPACE_OR_TAB(*s)) s++; if (strnEQ(s, "line", 4)) s += 4; else @@ -472,13 +457,13 @@ S_incline(pTHX_ char *s) s++; else return; - while (*s == ' ' || *s == '\t') s++; + while (SPACE_OR_TAB(*s)) s++; if (!isDIGIT(*s)) return; n = s; while (isDIGIT(*s)) s++; - while (*s == ' ' || *s == '\t') + while (SPACE_OR_TAB(*s)) s++; if (*s == '"' && (t = strchr(s+1, '"'))) { s++; @@ -488,15 +473,21 @@ S_incline(pTHX_ char *s) for (t = s; !isSPACE(*t); t++) ; e = t; } - while (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\f') + while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') e++; if (*e != '\n' && *e != '\0') return; /* false alarm */ ch = *t; *t = '\0'; - if (t - s > 0) + if (t - s > 0) { +#ifdef USE_ITHREADS + Safefree(CopFILE(PL_curcop)); +#else + SvREFCNT_dec(CopFILEGV(PL_curcop)); +#endif CopFILE_set(PL_curcop, s); + } *t = ch; CopLINE_set(PL_curcop, atoi(n)-1); } @@ -512,7 +503,7 @@ S_skipspace(pTHX_ register char *s) { dTHR; if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { - while (s < PL_bufend && (*s == ' ' || *s == '\t')) + while (s < PL_bufend && SPACE_OR_TAB(*s)) s++; return s; } @@ -819,13 +810,13 @@ Perl_str_to_version(pTHX_ SV *sv) NV nshift = 1.0; STRLEN len; char *start = SvPVx(sv,len); - bool utf = SvUTF8(sv); + bool utf = SvUTF8(sv) ? TRUE : FALSE; char *end = start + len; while (start < end) { - I32 skip; + STRLEN skip; UV n; if (utf) - n = utf8_to_uv((U8*)start, &skip); + n = utf8_to_uv((U8*)start, len, &skip, 0); else { n = *(U8*)start; skip = 1; @@ -857,7 +848,7 @@ S_force_version(pTHX_ char *s) for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++); if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) { SV *ver; - s = scan_num(s); + s = scan_num(s, &yylval); version = yylval.opval; ver = cSVOPx(version)->op_sv; if (SvPOK(ver) && !SvNIOK(ver)) { @@ -896,7 +887,7 @@ S_tokeq(pTHX_ SV *sv) goto finish; s = SvPV_force(sv, len); - if (SvIVX(sv) == -1) + if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) goto finish; send = s + len; while (s < send && *s != '\\') @@ -974,6 +965,8 @@ S_sublex_start(pTHX) p = SvPV(sv, len); nsv = newSVpvn(p, len); + if (SvUTF8(sv)) + SvUTF8_on(nsv); SvREFCNT_dec(sv); sv = nsv; } @@ -1193,14 +1186,14 @@ S_scan_const(pTHX_ char *start) register char *s = start; /* start of the constant */ register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ - bool has_utf = FALSE; /* embedded \x{} */ - I32 len; /* ? */ + bool didrange = FALSE; /* did we just finish a range? */ + bool has_utf8 = FALSE; /* embedded \x{} */ UV uv; I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) : UTF; - I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) + I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF)) : UTF; @@ -1226,6 +1219,12 @@ S_scan_const(pTHX_ char *start) min = (U8)*d; /* first char in range */ max = (U8)d[1]; /* last char in range */ + if (min > max) { + Perl_croak(aTHX_ + "Invalid [] range \"%c-%c\" in transliteration operator", + (char)min, (char)max); + } + #ifndef ASCIIish if ((isLOWER(min) && isLOWER(max)) || (isUPPER(min) && isUPPER(max))) { @@ -1246,11 +1245,15 @@ S_scan_const(pTHX_ char *start) /* mark the range as done, and continue */ dorange = FALSE; + didrange = TRUE; continue; - } + } /* range begins (ignore - as first or last char) */ else if (*s == '-' && s+1 < send && s != start) { + if (didrange) { + Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); + } if (utf) { *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */ s++; @@ -1259,6 +1262,9 @@ S_scan_const(pTHX_ char *start) dorange = TRUE; s++; } + else { + didrange = FALSE; + } } /* if we get here, we're not doing a transliteration */ @@ -1302,9 +1308,11 @@ S_scan_const(pTHX_ char *start) *d++ = *s++; } - /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */ + /* check for embedded arrays + (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-) + */ else if (*s == '@' && s[1] - && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1]))) + && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1]))) break; /* check for embedded scalars. only stop if we're sure it's a @@ -1319,21 +1327,24 @@ S_scan_const(pTHX_ char *start) /* (now in tr/// code again) */ - if (*s & 0x80 && thisutf) { - (void)utf8_to_uv((U8*)s, &len); - if (len == 1) { - /* illegal UTF8, make it valid */ - char *old_pvx = SvPVX(sv); - /* need space for one extra char (NOTE: SvCUR() not set here) */ - d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx); - d = (char*)uv_to_utf8((U8*)d, (U8)*s++); - } - else { - while (len--) - *d++ = *s++; - } - has_utf = TRUE; - continue; + if (*s & 0x80 && this_utf8) { + STRLEN len; + UV uv; + + uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY); + if (len == 1) { + /* Illegal UTF8 (a high-bit byte), make it valid. */ + char *old_pvx = SvPVX(sv); + /* need space for one extra char (NOTE: SvCUR() not set here) */ + d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx); + d = (char*)uv_to_utf8((U8*)d, (U8)*s++); + } + else { + while (len--) + *d++ = *s++; + } + has_utf8 = TRUE; + continue; } /* backslashes */ @@ -1377,7 +1388,7 @@ S_scan_const(pTHX_ char *start) default: { dTHR; - if (ckWARN(WARN_MISC) && isALPHA(*s)) + if (ckWARN(WARN_MISC) && isALNUM(*s)) Perl_warner(aTHX_ WARN_MISC, "Unrecognized escape \\%c passed through", *s); @@ -1389,8 +1400,11 @@ S_scan_const(pTHX_ char *start) /* \132 indicates an octal constant */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - uv = (UV)scan_oct(s, 3, &len); - s += len; + { + STRLEN len = 0; /* disallow underscores */ + uv = (UV)scan_oct(s, 3, &len); + s += len; + } goto NUM_ESCAPE_INSERT; /* \x24 indicates a hex constant */ @@ -1402,12 +1416,19 @@ S_scan_const(pTHX_ char *start) yyerror("Missing right brace on \\x{}"); e = s; } - uv = (UV)scan_hex(s + 1, e - s - 1, &len); - s = e + 1; + else { + STRLEN len = 1; /* allow underscores */ + uv = (UV)scan_hex(s + 1, e - s - 1, &len); + has_utf8 = TRUE; + } + s = e + 1; } else { - uv = (UV)scan_hex(s, 2, &len); - s += len; + { + STRLEN len = 0; /* disallow underscores */ + uv = (UV)scan_hex(s, 2, &len); + s += len; + } } NUM_ESCAPE_INSERT: @@ -1415,8 +1436,8 @@ S_scan_const(pTHX_ char *start) * There will always enough room in sv since such escapes will * be longer than any utf8 sequence they can end up as */ - if (uv > 127) { - if (!thisutf && !has_utf && uv > 255) { + if (uv > 127 || has_utf8) { + if (!this_utf8 && !has_utf8 && uv > 255) { /* might need to recode whatever we have accumulated so far * if it contains any hibit chars */ @@ -1448,9 +1469,9 @@ S_scan_const(pTHX_ char *start) } } - if (thisutf || uv > 255) { + if (has_utf8 || uv > 255) { d = (char*)uv_to_utf8((U8*)d, uv); - has_utf = TRUE; + this_utf8 = TRUE; } else { *d++ = (char)uv; @@ -1479,8 +1500,17 @@ S_scan_const(pTHX_ char *start) res = new_constant( Nullch, 0, "charnames", res, Nullsv, "\\N{...}" ); str = SvPV(res,len); - if (len > 1) - has_utf = TRUE; + if (!has_utf8 && SvUTF8(res)) { + char *ostart = SvPVX(sv); + SvCUR_set(sv, d - ostart); + SvPOK_on(sv); + *d = '\0'; + sv_utf8_upgrade(sv); + /* this just broke our allocation above... */ + SvGROW(sv, send - start); + d = SvPVX(sv) + SvCUR(sv); + has_utf8 = TRUE; + } if (len > e - s + 4) { char *odest = SvPVX(sv); @@ -1504,10 +1534,13 @@ S_scan_const(pTHX_ char *start) *d = *s++; if (isLOWER(*d)) *d = toUPPER(*d); - *d++ = toCTRL(*d); + *d = toCTRL(*d); + d++; #else - len = *s++; - *d++ = toCTRL(len); + { + U8 c = *s++; + *d++ = toCTRL(c); + } #endif continue; @@ -1555,7 +1588,7 @@ S_scan_const(pTHX_ char *start) *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); SvPOK_on(sv); - if (has_utf) + if (has_utf8) SvUTF8_on(sv); /* shrink the sv if we allocated more than we used */ @@ -1850,7 +1883,7 @@ S_incl_perldb(pTHX) * store private buffers and state information. * * The supplied datasv parameter is upgraded to a PVIO type - * and the IoDIRP field is used to store the function pointer, + * and the IoDIRP/IoANY field is used to store the function pointer, * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for * private use must be set using malloc'd pointers. @@ -1868,7 +1901,7 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) datasv = NEWSV(255,0); if (!SvUPGRADE(datasv, SVt_PVIO)) Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO"); - IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */ + IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */ IoFLAGS(datasv) |= IOf_FAKE_DIRP; DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", funcp, SvPV_nolen(datasv))); @@ -1888,9 +1921,9 @@ Perl_filter_del(pTHX_ filter_t funcp) return; /* if filter is on top of stack (usual case) just pop it off */ datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); - if (IoDIRP(datasv) == (DIR*)funcp) { + if (IoANY(datasv) == (void *)funcp) { IoFLAGS(datasv) &= ~IOf_FAKE_DIRP; - IoDIRP(datasv) = (DIR*)NULL; + IoANY(datasv) = (void *)NULL; sv_free(av_pop(PL_rsfp_filters)); return; @@ -1950,7 +1983,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */ } /* Get function pointer hidden within datasv */ - funcp = (filter_t)IoDIRP(datasv); + funcp = (filter_t)IoANY(datasv); DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_read %d: via function %p (%s)\n", idx, funcp, SvPV_nolen(datasv))); @@ -1981,6 +2014,31 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) return (sv_gets(sv, fp, append)); } +STATIC HV * +S_find_in_my_stash(pTHX_ char *pkgname, I32 len) +{ + GV *gv; + + if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) + return PL_curstash; + + if (len > 2 && + (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') && + (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV))) + { + return GvHV(gv); /* Foo:: */ + } + + /* use constant CLASS => 'MyClass' */ + if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) { + SV *sv; + if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) { + pkgname = SvPV_nolen(sv); + } + } + + return gv_stashpv(pkgname, FALSE); +} #ifdef DEBUGGING static char* exp_name[] = @@ -2014,6 +2072,34 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) if we already built the token before, use it. */ +#ifdef USE_PURE_BISON +#ifdef __SC__ +#pragma segment Perl_yylex_r +#endif +int +Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp) +{ + dTHR; + int r; + + yylval_pointer[yyactlevel] = lvalp; + yychar_pointer[yyactlevel] = lcharp; + yyactlevel++; + if (yyactlevel >= YYMAXLEVEL) + Perl_croak(aTHX_ "panic: YYMAXLEVEL"); + + r = Perl_yylex(aTHX); + + yyactlevel--; + + return r; +} +#endif + +#ifdef __SC__ +#pragma segment Perl_yylex +#endif + int #ifdef USE_PURE_BISON Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp) @@ -2029,11 +2115,6 @@ Perl_yylex(pTHX) GV *gv = Nullgv; GV **gvp = 0; -#ifdef USE_PURE_BISON - yylval_pointer = lvalp; - yychar_pointer = lcharp; -#endif - /* check if there's an identifier for us to look at */ if (PL_pending_ident) { /* pit holds the identifier we read and pending_ident is reset */ @@ -2139,9 +2220,14 @@ Perl_yylex(pTHX) */ if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV); - if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) - yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s", - PL_tokenbuf, PL_tokenbuf)); + if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) + && ckWARN(WARN_AMBIGUOUS)) + { + /* Downgraded from fatal to warning 20000522 mjd */ + Perl_warner(aTHX_ WARN_AMBIGUOUS, + "Possible unintended interpolation of %s in string", + PL_tokenbuf); + } } /* build ops for a bareword */ @@ -2434,7 +2520,34 @@ Perl_yylex(pTHX) goto retry; } do { - if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) { + bool bof = PL_rsfp ? TRUE : FALSE; + if (bof) { +#ifdef PERLIO_IS_STDIO +# ifdef __GNU_LIBRARY__ +# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */ +# define FTELL_FOR_PIPE_IS_BROKEN +# endif +# else +# ifdef __GLIBC__ +# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */ +# define FTELL_FOR_PIPE_IS_BROKEN +# endif +# endif +# endif +#endif +#ifdef FTELL_FOR_PIPE_IS_BROKEN + /* This loses the possibility to detect the bof + * situation on perl -P when the libc5 is being used. + * Workaround? Maybe attach some extra state to PL_rsfp? + */ + if (!PL_preprocess) + bof = PerlIO_tell(PL_rsfp) == 0; +#else + bof = PerlIO_tell(PL_rsfp) == 0; +#endif + } + s = filter_gets(PL_linestr, PL_rsfp, 0); + if (s == Nullch) { fake_eof: if (PL_rsfp) { if (PL_preprocess && !PL_in_eval) @@ -2457,6 +2570,9 @@ Perl_yylex(pTHX) PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); sv_setpv(PL_linestr,""); TOKEN(';'); /* not infinite loop because rsfp is NULL now */ + } else if (bof) { + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + s = swallow_bom((U8*)s); } if (PL_doextract) { if (*s == '#' && s[1] == '!' && instr(s,"perl")) @@ -2469,7 +2585,7 @@ Perl_yylex(pTHX) PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_doextract = FALSE; } - } + } incline(s); } while (PL_doextract); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; @@ -2569,6 +2685,7 @@ Perl_yylex(pTHX) *s = '#'; /* Don't try to parse shebang line */ } #endif /* ALTERNATE_SHEBANG */ +#ifndef MACOS_TRADITIONAL if (!d && *s == '#' && ipathend > ipath && @@ -2596,13 +2713,14 @@ Perl_yylex(pTHX) PerlProc_execv(ipath, newargv); Perl_croak(aTHX_ "Can't exec %s", ipath); } +#endif if (d) { U32 oldpdb = PL_perldb; bool oldn = PL_minus_n; bool oldp = PL_minus_p; while (*d && !isSPACE(*d)) d++; - while (*d == ' ' || *d == '\t') d++; + while (SPACE_OR_TAB(*d)) d++; if (*d++ == '-') { do { @@ -2641,14 +2759,22 @@ Perl_yylex(pTHX) #ifdef PERL_STRICT_CR Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); Perl_croak(aTHX_ - "(Maybe you didn't strip carriage returns after a network transfer?)\n"); + "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); #endif case ' ': case '\t': case '\f': case 013: +#ifdef MACOS_TRADITIONAL + case '\312': +#endif s++; goto retry; case '#': case '\n': if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) { + if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) { + /* handle eval qq[#line 1 "foo"\n ...] */ + CopLINE_dec(PL_curcop); + incline(s); + } d = PL_bufend; while (s < d && *s != '\n') s++; @@ -2672,7 +2798,7 @@ Perl_yylex(pTHX) PL_bufptr = s; tmp = *s++; - while (s < PL_bufend && (*s == ' ' || *s == '\t')) + while (s < PL_bufend && SPACE_OR_TAB(*s)) s++; if (strnEQ(s,"=>",2)) { @@ -2911,8 +3037,7 @@ Perl_yylex(pTHX) PL_expect = XTERM; TOKEN('('); case ';': - if (CopLINE(PL_curcop) < PL_copline) - PL_copline = CopLINE(PL_curcop); + CLINE; tmp = *s++; OPERATOR(tmp); case ')': @@ -2956,20 +3081,20 @@ Perl_yylex(pTHX) PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; OPERATOR(HASHBRACK); case XOPERATOR: - while (s < PL_bufend && (*s == ' ' || *s == '\t')) + while (s < PL_bufend && SPACE_OR_TAB(*s)) s++; d = s; PL_tokenbuf[0] = '\0'; if (d < PL_bufend && *d == '-') { PL_tokenbuf[0] = '-'; d++; - while (d < PL_bufend && (*d == ' ' || *d == '\t')) + while (d < PL_bufend && SPACE_OR_TAB(*d)) d++; } if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) { d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE, &len); - while (d < PL_bufend && (*d == ' ' || *d == '\t')) + while (d < PL_bufend && SPACE_OR_TAB(*d)) d++; if (*d == '}') { char minus = (PL_tokenbuf[0] == '-'); @@ -3089,7 +3214,7 @@ Perl_yylex(pTHX) yyerror("Unmatched right curly bracket"); else PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; - if (PL_lex_brackets < PL_lex_formbrack) + if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL) PL_lex_formbrack = 0; if (PL_lex_state == LEX_INTERPNORMAL) { if (PL_lex_brackets == 0) { @@ -3186,9 +3311,9 @@ Perl_yylex(pTHX) if (PL_lex_brackets < PL_lex_formbrack) { char *t; #ifdef PERL_STRICT_CR - for (t = s; *t == ' ' || *t == '\t'; t++) ; + for (t = s; SPACE_OR_TAB(*t); t++) ; #else - for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ; + for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ; #endif if (*t == '\n' || *t == '#') { s--; @@ -3278,7 +3403,7 @@ Perl_yylex(pTHX) /* This kludge not intended to be bulletproof. */ if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) { yylval.opval = newSVOP(OP_CONST, 0, - newSViv((IV)PL_compiling.cop_arybase)); + newSViv(PL_compiling.cop_arybase)); yylval.opval->op_private = OPpCONST_ARYBASE; TERM(THING); } @@ -3456,7 +3581,7 @@ Perl_yylex(pTHX) /* FALL THROUGH */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - s = scan_num(s); + s = scan_num(s, &yylval); if (PL_expect == XOPERATOR) no_op("Number",s); TERM(THING); @@ -3526,7 +3651,7 @@ Perl_yylex(pTHX) while (isDIGIT(*start) || *start == '_') start++; if (*start == '.' && isDIGIT(start[1])) { - s = scan_num(s); + s = scan_num(s, &yylval); TERM(THING); } /* avoid v123abc() or $h{v1}, allow C */ @@ -3537,7 +3662,7 @@ Perl_yylex(pTHX) gv = gv_fetchpv(s, FALSE, SVt_PVCV); *start = c; if (!gv) { - s = scan_num(s); + s = scan_num(s, &yylval); TERM(THING); } } @@ -3612,7 +3737,7 @@ Perl_yylex(pTHX) tmp = keyword(PL_tokenbuf, len); /* Is this a word before a => operator? */ - if (strnEQ(d,"=>",2)) { + if (*d == '=' && d[1] == '>') { CLINE; yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0)); yylval.opval->op_private = OPpCONST_BARE; @@ -3767,14 +3892,22 @@ Perl_yylex(pTHX) } } - /* If followed by a paren, it's certainly a subroutine. */ PL_expect = XOPERATOR; s = skipspace(s); + + /* Is this a word before a => operator? */ + if (*s == '=' && s[1] == '>') { + CLINE; + sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf); + TERM(WORD); + } + + /* If followed by a paren, it's certainly a subroutine. */ if (*s == '(') { CLINE; if (gv && GvCVu(gv)) { - for (d = s + 1; *d == ' ' || *d == '\t'; d++) ; + for (d = s + 1; SPACE_OR_TAB(*d); d++) ; if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) { s = d + 1; goto its_constant; @@ -3911,11 +4044,11 @@ Perl_yylex(pTHX) /* Mark this internal pseudo-handle as clean */ IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; if (PL_preprocess) - IoTYPE(GvIOp(gv)) = '|'; + IoTYPE(GvIOp(gv)) = IoTYPE_PIPE; else if ((PerlIO*)PL_rsfp == PerlIO_stdin()) - IoTYPE(GvIOp(gv)) = '-'; + IoTYPE(GvIOp(gv)) = IoTYPE_STD; else - IoTYPE(GvIOp(gv)) = '<'; + IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) /* if the script was opened in binmode, we need to revert * it to text mode for compatibility; but only iff it has CRs @@ -3924,7 +4057,7 @@ Perl_yylex(pTHX) && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') { Off_t loc = 0; - if (IoTYPE(GvIOp(gv)) == '<') { + if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { loc = PerlIO_tell(PL_rsfp); (void)PerlIO_seek(PL_rsfp, 0L, 0); } @@ -4360,7 +4493,7 @@ Perl_yylex(pTHX) s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) goto really_sub; - PL_in_my_stash = gv_stashpv(PL_tokenbuf, FALSE); + PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); if (!PL_in_my_stash) { char tmpbuf[1024]; PL_bufptr = s; @@ -4489,7 +4622,7 @@ Perl_yylex(pTHX) for (; !isSPACE(*d) && len; --len, ++d) ; } words = append_elem(OP_LIST, words, - newSVOP(OP_CONST, 0, newSVpvn(b, d-b))); + newSVOP(OP_CONST, 0, tokeq(newSVpvn(b, d-b)))); } } if (words) { @@ -4968,6 +5101,9 @@ Perl_yylex(pTHX) } }} } +#ifdef __SC__ +#pragma segment Main +#endif I32 Perl_keyword(pTHX_ register char *d, I32 len) @@ -5020,12 +5156,12 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"cos")) return -KEY_cos; break; case 4: - if (strEQ(d,"chop")) return KEY_chop; + if (strEQ(d,"chop")) return -KEY_chop; break; case 5: if (strEQ(d,"close")) return -KEY_close; if (strEQ(d,"chdir")) return -KEY_chdir; - if (strEQ(d,"chomp")) return KEY_chomp; + if (strEQ(d,"chomp")) return -KEY_chomp; if (strEQ(d,"chmod")) return -KEY_chmod; if (strEQ(d,"chown")) return -KEY_chown; if (strEQ(d,"crypt")) return -KEY_crypt; @@ -5070,7 +5206,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) } break; case 'E': - if (strEQ(d,"EQ")) { deprecate(d); return -KEY_eq;} if (strEQ(d,"END")) return KEY_END; break; case 'e': @@ -5087,7 +5222,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"exit")) return -KEY_exit; if (strEQ(d,"eval")) return KEY_eval; if (strEQ(d,"exec")) return -KEY_exec; - if (strEQ(d,"each")) return KEY_each; + if (strEQ(d,"each")) return -KEY_each; break; case 5: if (strEQ(d,"elsif")) return KEY_elsif; @@ -5136,12 +5271,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) break; } break; - case 'G': - if (len == 2) { - if (strEQ(d,"GT")) { deprecate(d); return -KEY_gt;} - if (strEQ(d,"GE")) { deprecate(d); return -KEY_ge;} - } - break; case 'g': if (strnEQ(d,"get",3)) { d += 3; @@ -5237,16 +5366,10 @@ Perl_keyword(pTHX_ register char *d, I32 len) break; case 'k': if (len == 4) { - if (strEQ(d,"keys")) return KEY_keys; + if (strEQ(d,"keys")) return -KEY_keys; if (strEQ(d,"kill")) return -KEY_kill; } break; - case 'L': - if (len == 2) { - if (strEQ(d,"LT")) { deprecate(d); return -KEY_lt;} - if (strEQ(d,"LE")) { deprecate(d); return -KEY_le;} - } - break; case 'l': switch (len) { case 2: @@ -5298,9 +5421,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) break; } break; - case 'N': - if (strEQ(d,"NE")) { deprecate(d); return -KEY_ne;} - break; case 'n': if (strEQ(d,"next")) return KEY_next; if (strEQ(d,"ne")) return -KEY_ne; @@ -5328,11 +5448,11 @@ Perl_keyword(pTHX_ register char *d, I32 len) case 'p': switch (len) { case 3: - if (strEQ(d,"pop")) return KEY_pop; + if (strEQ(d,"pop")) return -KEY_pop; if (strEQ(d,"pos")) return KEY_pos; break; case 4: - if (strEQ(d,"push")) return KEY_push; + if (strEQ(d,"push")) return -KEY_push; if (strEQ(d,"pack")) return -KEY_pack; if (strEQ(d,"pipe")) return -KEY_pipe; break; @@ -5439,7 +5559,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) case 'h': switch (len) { case 5: - if (strEQ(d,"shift")) return KEY_shift; + if (strEQ(d,"shift")) return -KEY_shift; break; case 6: if (strEQ(d,"shmctl")) return -KEY_shmctl; @@ -5468,7 +5588,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) case 'p': if (strEQ(d,"split")) return KEY_split; if (strEQ(d,"sprintf")) return -KEY_sprintf; - if (strEQ(d,"splice")) return KEY_splice; + if (strEQ(d,"splice")) return -KEY_splice; break; case 'q': if (strEQ(d,"sqrt")) return -KEY_sqrt; @@ -5548,7 +5668,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"unlink")) return -KEY_unlink; break; case 7: - if (strEQ(d,"unshift")) return KEY_unshift; + if (strEQ(d,"unshift")) return -KEY_unshift; if (strEQ(d,"ucfirst")) return -KEY_ucfirst; break; } @@ -5648,30 +5768,37 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, SV *res; SV **cvp; SV *cv, *typesv; - const char *why, *why1, *why2; + const char *why1, *why2, *why3; - if (!(PL_hints & HINT_LOCALIZE_HH)) { + if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { SV *msg; - why = "%^H is not localized"; - report_short: - why1 = why2 = ""; + why2 = strEQ(key,"charnames") + ? "(possibly a missing \"use charnames ...\")" + : ""; + msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s", + (type ? type: "undef"), why2); + + /* This is convoluted and evil ("goto considered harmful") + * but I do not understand the intricacies of all the different + * failure modes of %^H in here. The goal here is to make + * the most probable error message user-friendly. --jhi */ + + goto msgdone; + report: - msg = Perl_newSVpvf(aTHX_ "constant(%s): %s%s%s", - (type ? type: "undef"), why1, why2, why); + msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s", + (type ? type: "undef"), why1, why2, why3); + msgdone: yyerror(SvPVX(msg)); SvREFCNT_dec(msg); return sv; } - if (!table) { - why = "%^H is not defined"; - goto report_short; - } cvp = hv_fetch(table, key, strlen(key), FALSE); if (!cvp || !SvOK(*cvp)) { - why = "} is not defined"; why1 = "$^H{"; why2 = key; + why3 = "} is not defined"; goto report; } sv_2mortal(sv); /* Parent created it permanently */ @@ -5688,13 +5815,12 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, SAVETMPS; PUSHMARK(SP) ; - EXTEND(sp, 4); + EXTEND(sp, 3); if (pv) PUSHs(pv); PUSHs(sv); if (pv) PUSHs(typesv); - PUSHs(cv); PUTBACK; call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); @@ -5719,9 +5845,9 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, POPSTACK; if (!SvOK(res)) { - why = "}} did not return a defined value"; why1 = "Call to &{$^H{"; why2 = key; + why3 = "}} did not return a defined value"; sv = res; goto report; } @@ -5843,7 +5969,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des if (isSPACE(s[-1])) { while (s < send) { char ch = *s++; - if (ch != ' ' && ch != '\t') { + if (!SPACE_OR_TAB(ch)) { *d = ch; break; } @@ -5869,7 +5995,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des Perl_croak(aTHX_ ident_too_long); } *d = '\0'; - while (s < send && (*s == ' ' || *s == '\t')) s++; + while (s < send && SPACE_OR_TAB(*s)) s++; if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { dTHR; /* only for ckWARN */ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) { @@ -6079,45 +6205,20 @@ S_scan_trans(pTHX_ char *start) Perl_croak(aTHX_ "Transliteration replacement not terminated"); } - if (UTF) { - o = newSVOP(OP_TRANS, 0, 0); - utf8 = OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF; - } - else { - New(803,tbl,256,short); - o = newPVOP(OP_TRANS, 0, (char*)tbl); - utf8 = 0; - } + New(803,tbl,256,short); + o = newPVOP(OP_TRANS, 0, (char*)tbl); complement = del = squash = 0; - while (strchr("cdsCU", *s)) { + while (strchr("cds", *s)) { if (*s == 'c') complement = OPpTRANS_COMPLEMENT; else if (*s == 'd') del = OPpTRANS_DELETE; else if (*s == 's') squash = OPpTRANS_SQUASH; - else { - switch (count++) { - case 0: - if (*s == 'C') - utf8 &= ~OPpTRANS_FROM_UTF; - else - utf8 |= OPpTRANS_FROM_UTF; - break; - case 1: - if (*s == 'C') - utf8 &= ~OPpTRANS_TO_UTF; - else - utf8 |= OPpTRANS_TO_UTF; - break; - default: - Perl_croak(aTHX_ "Too many /C and /U options"); - } - } s++; } - o->op_private = del|squash|complement|utf8; + o->op_private = del|squash|complement; PL_lex_op = o; yylval.ival = OP_TRANS; @@ -6143,7 +6244,7 @@ S_scan_heredoc(pTHX_ register char *s) e = PL_tokenbuf + sizeof PL_tokenbuf - 1; if (!outer) *d++ = '\n'; - for (peek = s; *peek == ' ' || *peek == '\t'; peek++) ; + for (peek = s; SPACE_OR_TAB(*peek); peek++) ; if (*peek && strchr("`'\"",*peek)) { s = peek; term = *s++; @@ -6486,7 +6587,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) register char term; /* terminating character */ register char *to; /* current position in the sv's data */ I32 brackets = 1; /* bracket nesting level */ - bool has_utf = FALSE; /* is there any utf8 content? */ + bool has_utf8 = FALSE; /* is there any utf8 content? */ /* skip space before the delimiter */ if (isSPACE(*s)) @@ -6498,7 +6599,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* after skipping whitespace, the next character is the terminator */ term = *s; if ((term & 0x80) && UTF) - has_utf = TRUE; + has_utf8 = TRUE; /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); @@ -6544,8 +6645,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) have found the terminator */ else if (*s == term) break; - else if (!has_utf && (*s & 0x80) && UTF) - has_utf = TRUE; + else if (!has_utf8 && (*s & 0x80) && UTF) + has_utf8 = TRUE; *to = *s; } } @@ -6573,8 +6674,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) break; else if (*s == PL_multi_open) brackets++; - else if (!has_utf && (*s & 0x80) && UTF) - has_utf = TRUE; + else if (!has_utf8 && (*s & 0x80) && UTF) + has_utf8 = TRUE; *to = *s; } } @@ -6634,7 +6735,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) if (keep_delims) sv_catpvn(sv, s, 1); - if (has_utf) + if (has_utf8) SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop); s++; @@ -6679,12 +6780,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) */ char * -Perl_scan_num(pTHX_ char *start) +Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) { register char *s = start; /* current position in buffer */ register char *d; /* destination in temp buffer */ register char *e; /* end of temp buffer */ - NV value; /* number read, as a double */ + NV nv; /* number read, as a double */ SV *sv = Nullsv; /* place to put the converted number */ bool floatit; /* boolean: int or float? */ char *lastub = 0; /* position of last underbar */ @@ -6941,39 +7042,8 @@ Perl_scan_num(pTHX_ char *start) /* make an sv from the string */ sv = NEWSV(92,0); - /* unfortunately this monster needs to be on one line or - makedepend will be confused. */ -#if (defined(USE_64_BIT_INT) && (!defined(HAS_STRTOLL)|| !defined(HAS_STRTOULL))) || (!defined(USE_64_BIT_INT) && (!defined(HAS_STRTOL) || !defined(HAS_STRTOUL))) - - /* - No working strto[u]l[l]. Since atoi() doesn't do range checks, - we need to do this the hard way. - */ - - value = Atof(PL_tokenbuf); +#if defined(Strtol) && defined(Strtoul) - /* - See if we can make do with an integer value without loss of - precision. We use I_V to cast to an int, because some - compilers have issues. Then we try casting it back and see - if it was the same. We only do this if we know we - specifically read an integer. - - Note: if floatit is true, then we don't need to do the - conversion at all. - */ - { - UV tryuv = U_V(value); - if (!floatit && (NV)tryuv == value) { - if (tryuv <= IV_MAX) - sv_setiv(sv, (IV)tryuv); - else - sv_setuv(sv, tryuv); - } - else - sv_setnv(sv, value); - } -#else /* strtol/strtoll sets errno to ERANGE if the number is too big for an integer. We try to do an integer conversion first @@ -6989,15 +7059,66 @@ Perl_scan_num(pTHX_ char *start) else uv = Strtoul(PL_tokenbuf, (char**)NULL, 10); if (errno) - floatit = TRUE; /* probably just too large */ + floatit = TRUE; /* Probably just too large. */ else if (*PL_tokenbuf == '-') sv_setiv(sv, iv); + else if (uv <= IV_MAX) + sv_setiv(sv, uv); /* Prefer IVs over UVs. */ else sv_setuv(sv, uv); } if (floatit) { - value = Atof(PL_tokenbuf); - sv_setnv(sv, value); + nv = Atof(PL_tokenbuf); + sv_setnv(sv, nv); + } +#else + /* + No working strtou?ll?. + + Unfortunately atol() doesn't do range checks (returning + LONG_MIN/LONG_MAX, and setting errno to ERANGE on overflows) + everywhere [1], so we cannot use use atol() (or atoll()). + If we could, they would be used, as Atol(), very much like + Strtol() and Strtoul() are used above. + + [1] XXX Configure test needed to check for atol() + (and atoll()) overflow behaviour XXX + + --jhi + + We need to do this the hard way. */ + + nv = Atof(PL_tokenbuf); + + /* See if we can make do with an integer value without loss of + precision. We use U_V to cast to a UV, because some + compilers have issues. Then we try casting it back and see + if it was the same [1]. We only do this if we know we + specifically read an integer. If floatit is true, then we + don't need to do the conversion at all. + + [1] Note that this is lossy if our NVs cannot preserve our + UVs. There are metaconfig defines NV_PRESERVES_UV (a boolean) + and NV_PRESERVES_UV_BITS (a number), but in general we really + do hope all such potentially lossy platforms have strtou?ll? + to do a lossless IV/UV conversion. + + Maybe could do some tricks with DBL_DIG, LDBL_DIG and + DBL_MANT_DIG and LDBL_MANT_DIG (these are already available + as NV_DIG and NV_MANT_DIG)? + + --jhi + */ + { + UV uv = U_V(nv); + if (!floatit && (NV)uv == nv) { + if (uv <= IV_MAX) + sv_setiv(sv, uv); /* Prefer IVs over UVs. */ + else + sv_setuv(sv, uv); + } + else + sv_setnv(sv, nv); } #endif if ( floatit ? (PL_hints & HINT_NEW_FLOAT) : @@ -7072,9 +7193,9 @@ vstring: /* make the op for the constant and return */ if (sv) - yylval.opval = newSVOP(OP_CONST, 0, sv); + lvalp->opval = newSVOP(OP_CONST, 0, sv); else - yylval.opval = Nullop; + lvalp->opval = Nullop; return s; } @@ -7089,12 +7210,12 @@ S_scan_formline(pTHX_ register char *s) bool needargs = FALSE; while (!needargs) { - if (*s == '.' || *s == '}') { + if (*s == '.' || *s == /*{*/'}') { /*SUPPRESS 530*/ #ifdef PERL_STRICT_CR - for (t = s+1;*t == ' ' || *t == '\t'; t++) ; + for (t = s+1;SPACE_OR_TAB(*t); t++) ; #else - for (t = s+1;*t == ' ' || *t == '\t' || *t == '\r'; t++) ; + for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ; #endif if (*t == '\n' || t == PL_bufend) break; @@ -7304,13 +7425,93 @@ Perl_yyerror(pTHX_ char *s) Perl_warn(aTHX_ "%"SVf, msg); else qerror(msg); - if (PL_error_count >= 10) - Perl_croak(aTHX_ "%s has too many errors.\n", CopFILE(PL_curcop)); + if (PL_error_count >= 10) { + if (PL_in_eval && SvCUR(ERRSV)) + Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", + ERRSV, CopFILE(PL_curcop)); + else + Perl_croak(aTHX_ "%s has too many errors.\n", + CopFILE(PL_curcop)); + } PL_in_my = 0; PL_in_my_stash = Nullhv; return 0; } +STATIC char* +S_swallow_bom(pTHX_ U8 *s) +{ + STRLEN slen; + slen = SvCUR(PL_linestr); + switch (*s) { + case 0xFF: + if (s[1] == 0xFE) { + /* UTF-16 little-endian */ + if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ + Perl_croak(aTHX_ "Unsupported script encoding"); +#ifndef PERL_NO_UTF16_FILTER + DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n")); + s += 2; + if (PL_bufend > (char*)s) { + U8 *news; + I32 newlen; + + filter_add(utf16rev_textfilter, NULL); + New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); + PL_bufend = (char*)utf16_to_utf8_reversed(s, news, + PL_bufend - (char*)s - 1, + &newlen); + Copy(news, s, newlen, U8); + SvCUR_set(PL_linestr, newlen); + PL_bufend = SvPVX(PL_linestr) + newlen; + news[newlen++] = '\0'; + Safefree(news); + } +#else + Perl_croak(aTHX_ "Unsupported script encoding"); +#endif + } + break; + case 0xFE: + if (s[1] == 0xFF) { /* UTF-16 big-endian */ +#ifndef PERL_NO_UTF16_FILTER + DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n")); + s += 2; + if (PL_bufend > (char *)s) { + U8 *news; + I32 newlen; + + filter_add(utf16_textfilter, NULL); + New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); + PL_bufend = (char*)utf16_to_utf8(s, news, + PL_bufend - (char*)s, + &newlen); + Copy(news, s, newlen, U8); + SvCUR_set(PL_linestr, newlen); + PL_bufend = SvPVX(PL_linestr) + newlen; + news[newlen++] = '\0'; + Safefree(news); + } +#else + Perl_croak(aTHX_ "Unsupported script encoding"); +#endif + } + break; + case 0xEF: + if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { + DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n")); + s += 3; /* UTF-8 */ + } + break; + case 0: + if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */ + s[2] == 0xFE && s[3] == 0xFF) + { + Perl_croak(aTHX_ "Unsupported script encoding"); + } + } + return (char*)s; +} #ifdef PERL_OBJECT #include "XSUB.h" @@ -7332,3 +7533,43 @@ restore_rsfp(pTHXo_ void *f) PerlIO_close(PL_rsfp); PL_rsfp = fp; } + +#ifndef PERL_NO_UTF16_FILTER +static I32 +utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen) +{ + I32 count = FILTER_READ(idx+1, sv, maxlen); + if (count) { + U8* tmps; + U8* tend; + I32 newlen; + New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); + if (!*SvPV_nolen(sv)) + /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ + return count; + + tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); + sv_usepvn(sv, (char*)tmps, tend - tmps); + } + return count; +} + +static I32 +utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen) +{ + I32 count = FILTER_READ(idx+1, sv, maxlen); + if (count) { + U8* tmps; + U8* tend; + I32 newlen; + if (!*SvPV_nolen(sv)) + /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ + return count; + + New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); + tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); + sv_usepvn(sv, (char*)tmps, tend - tmps); + } + return count; +} +#endif